diff --git a/src/tito/compat.py b/src/tito/compat.py index 8e6cb4f..a9ec6cd 100644 --- a/src/tito/compat.py +++ b/src/tito/compat.py @@ -26,34 +26,26 @@ if PY2: from ConfigParser import RawConfigParser from StringIO import StringIO import xmlrpclib - text_type = unicode - binary_type = str else: import subprocess from configparser import NoOptionError from configparser import RawConfigParser from io import StringIO import xmlrpc.client as xmlrpclib - text_type = str - binary_type = bytes -def ensure_text(x, encoding="utf8"): - if isinstance(x, binary_type): - return x.decode(encoding) - elif isinstance(x, text_type): +def decode_bytes(x, source_encoding): + if PY2: return x else: - raise TypeError("Not expecting type '%s'" % type(x)) + return x.decode(source_encoding) -def ensure_binary(x, encoding="utf8"): - if isinstance(x, text_type): - return x.encode(encoding) - elif isinstance(x, binary_type): +def encode_bytes(x, destination_encoding): + if PY2: return x else: - raise TypeError("Not expecting type '%s'" % type(x)) + return bytes(x, destination_encoding) def getstatusoutput(cmd): diff --git a/src/tito/tar.py b/src/tito/tar.py index 268f30d..4a78410 100644 --- a/src/tito/tar.py +++ b/src/tito/tar.py @@ -14,8 +14,8 @@ import re import struct import sys -import codecs -import tito.compat + +from tito.compat import decode_bytes, encode_bytes RECORD_SIZE = 512 @@ -120,7 +120,7 @@ class TarFixer(object): def full_read(self, read_size): read = self.fh.read(read_size) amount_read = len(read) - while amount_read < read_size: + while (amount_read < read_size): left_to_read = read_size - amount_read next_read = self.fh.read(left_to_read) @@ -133,7 +133,13 @@ class TarFixer(object): return read def write(self, data): - self.out.write(tito.compat.ensure_binary(data)) + """Write the data correctly depending on the mode of the file. While binary mode + is preferred, we support text mode for streams like stdout.""" + if hasattr(self.out, 'mode') and 'b' in self.out.mode: + data = bytearray(data) + else: + data = decode_bytes(data, "utf8") + self.out.write(data) def chunk_to_hash(self, chunk): # Our struct template is only 500 bytes, but the last 12 bytes are NUL @@ -141,7 +147,7 @@ class TarFixer(object): # template as '12x'. The unpack_from method will read the bytes our # template defines from chunk and discard the rest. unpacked = struct.unpack_from(self.struct_template, chunk) - unpacked = list(map(lambda x: tito.compat.ensure_text(x), unpacked)) + unpacked = list(map(lambda x: decode_bytes(x, 'utf8'), unpacked)) # Zip what we read together with the member names and create a dictionary chunk_props = dict(zip(self.struct_members, unpacked)) @@ -187,9 +193,9 @@ class TarFixer(object): field_size = int(re.match('(\d+)', member_template).group(1)) - 1 fmt = "%0" + str(field_size) + "o\x00" as_string = fmt % chunk_props[member] - pack_values.append(tito.compat.ensure_binary(as_string)) + pack_values.append(as_string.encode("utf8")) else: - pack_values.append(tito.compat.ensure_binary(chunk_props[member])) + pack_values.append(chunk_props[member].encode("utf8")) return pack_values def process_header(self, chunk_props): @@ -212,10 +218,10 @@ class TarFixer(object): # the size of the whole string (including the %u), the first %s is the # keyword, the second one is the value. # - # Since the git ref is always 40 ASCII characters we can pre-compute the length - # to put in the extended header + # Since the git ref is always 40 characters we can + # pre-compute the length to put in the extended header comment = "52 comment=%s\n" % self.gitref - data_out = struct.pack("=52s460x", tito.compat.ensure_binary(comment, "ascii")) + data_out = struct.pack("=52s460x", encode_bytes(comment, "ascii")) self.write(data_out) self.total_length += len(data_out) @@ -235,9 +241,9 @@ class TarFixer(object): values = self.encode_header(chunk_props) new_chksum = 0 for val in values: - val_bytes = bytearray(tito.compat.ensure_binary(val)) + val_bytes = val.decode("utf8") for b in val_bytes: - new_chksum += b + new_chksum += ord(b) return "%07o\x00" % new_chksum def process_chunk(self, chunk): @@ -330,8 +336,8 @@ class TarFixer(object): if __name__ == '__main__': - if len(sys.argv) != 5: - sys.exit("Usage: %s UNIX_TIMESTAMP GIT_HASH TAR_FILE DESTINATION_FILE" % sys.argv[0]) + if len(sys.argv) != 4: + sys.exit("Usage: %s UNIX_TIMESTAMP GIT_HASH TAR_FILE" % sys.argv[0]) try: timestamp = int(sys.argv[1]) @@ -340,17 +346,11 @@ if __name__ == '__main__': gitref = sys.argv[2] tar_file = sys.argv[3] - destination_file = sys.argv[4] - - try: - dfh = open(destination_file, 'wb') - except: - print("Could not open %s" % destination_file) try: fh = open(tar_file, 'rb') except: print("Could not read %s" % tar_file) - reader = TarFixer(fh, dfh, timestamp, gitref) + reader = TarFixer(fh, sys.stdout, timestamp, gitref) reader.fix() diff --git a/test/unit/resources/archivé.tar b/test/unit/resources/archivé.tar deleted file mode 100644 index 34a6d6f..0000000 Binary files a/test/unit/resources/archivé.tar and /dev/null differ diff --git a/test/unit/resources/les_misérables.tar b/test/unit/resources/les_misérables.tar deleted file mode 100644 index 086fe59..0000000 Binary files a/test/unit/resources/les_misérables.tar and /dev/null differ diff --git a/test/unit/test_tar.py b/test/unit/test_tar.py index 135ad84..6007298 100644 --- a/test/unit/test_tar.py +++ b/test/unit/test_tar.py @@ -1,11 +1,8 @@ -# coding=utf-8 import hashlib import os -import tarfile import unittest -import io -from tito.compat import StringIO, ensure_binary +from tito.compat import StringIO, encode_bytes from tito.tar import TarFixer from mock import Mock @@ -15,10 +12,8 @@ EXPECTED_REF = "3518d720bff20db887b7a5e5dddd411d14dca1f9" class TarTest(unittest.TestCase): def setUp(self): - self.out = io.BytesIO() + self.out = StringIO() self.tarfixer = TarFixer(None, self.out, EXPECTED_TIMESTAMP, EXPECTED_REF) - self.utf8_containing_file = os.path.join(os.path.dirname(__file__), 'resources', 'les_misérables.tar') - self.utf8_file = os.path.join(os.path.dirname(__file__), 'resources', 'archivé.tar') self.test_file = os.path.join(os.path.dirname(__file__), 'resources', 'archive.tar') self.reference_file = os.path.join(os.path.dirname(__file__), 'resources', 'archive-fixed.tar') self.reference_hash = self.hash_file(self.reference_file) @@ -70,7 +65,7 @@ class TarTest(unittest.TestCase): self.fh = open(self.test_file, 'rb') self.tarfixer.fh = self.fh self.tarfixer.fix() - self.assertEqual(self.reference_hash, self.hash_buffer(self.out.getvalue())) + self.assertEqual(self.reference_hash, self.hash_buffer(encode_bytes(self.out.getvalue(), "utf8"))) def test_fix_fails_unless_file_in_binary_mode(self): self.fh = open(self.test_file, 'r') @@ -101,8 +96,8 @@ class TarTest(unittest.TestCase): self.tarfixer.create_extended_header() header = self.out.getvalue() self.assertEqual(512, len(header)) - self.assertEqual(ensure_binary("52 comment=%s\n" % EXPECTED_REF), header[:52]) - self.assertEqual(ensure_binary("\x00" * (512 - 53)), header[53:]) + self.assertEqual("52 comment=%s\n" % EXPECTED_REF, header[:52]) + self.assertEqual("\x00" * (512 - 53), header[53:]) def test_calculate_checksum(self): fields = { @@ -124,33 +119,5 @@ class TarTest(unittest.TestCase): } result = self.tarfixer.encode_header(chunk, ['mode', 'name']) expected_result = ["%07o\x00" % mode, "hello"] - expected_result = list(map(lambda x: ensure_binary(x), expected_result)) + expected_result = list(map(lambda x: encode_bytes(x, "utf8"), expected_result)) self.assertEqual(expected_result, result) - - def test_utf8_file(self): - # The goal of this test is to *not* throw a UnicodeDecodeError - self.fh = open(self.utf8_file, 'rb') - self.tarfixer.fh = self.fh - self.tarfixer.fix() - - self.assertEqual(self.reference_hash, self.hash_buffer(self.out.getvalue())) - - # rewind the buffer - self.out.seek(0) - try: - tarball = tarfile.open(fileobj=self.out, mode="r") - except tarfile.TarError: - self.fail("Unable to open generated tarball") - - def test_utf8_containing_file(self): - # # The goal of this test is to *not* blow up due to a corrupted tarball - self.fh = open(self.utf8_containing_file, 'rb') - self.tarfixer.fh = self.fh - self.tarfixer.fix() - - # rewind the buffer - self.out.seek(0) - try: - tarball = tarfile.open(fileobj=self.out, mode="r") - except tarfile.TarError as e: - self.fail("Unable to open generated tarball: %s" % e)