mirror of
https://github.com/rpm-software-management/tito.git
synced 2025-02-23 12:12:47 +00:00
Fix Python 3 issues with binary versus string types.
Python 3 is very picky about not mixing binary and string data. This patch gets TarFixer running on both Python 2.6+ and Python 3.x.
This commit is contained in:
parent
07d62cf24e
commit
a5b43b6b96
3 changed files with 75 additions and 30 deletions
|
@ -34,6 +34,20 @@ else:
|
||||||
import xmlrpc.client as xmlrpclib
|
import xmlrpc.client as xmlrpclib
|
||||||
|
|
||||||
|
|
||||||
|
def decode_bytes(x, source_encoding):
|
||||||
|
if PY2:
|
||||||
|
return x
|
||||||
|
else:
|
||||||
|
return x.decode(source_encoding)
|
||||||
|
|
||||||
|
|
||||||
|
def encode_bytes(x, destination_encoding):
|
||||||
|
if PY2:
|
||||||
|
return x
|
||||||
|
else:
|
||||||
|
return bytes(x, destination_encoding)
|
||||||
|
|
||||||
|
|
||||||
def getstatusoutput(cmd):
|
def getstatusoutput(cmd):
|
||||||
"""
|
"""
|
||||||
Returns (status, output) of executing cmd in a shell.
|
Returns (status, output) of executing cmd in a shell.
|
||||||
|
|
|
@ -15,6 +15,8 @@ import re
|
||||||
import struct
|
import struct
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from tito.compat import decode_bytes, encode_bytes
|
||||||
|
|
||||||
RECORD_SIZE = 512
|
RECORD_SIZE = 512
|
||||||
|
|
||||||
# Git writes its tarballs to be a multiple of 10240. I'm not sure why: the
|
# Git writes its tarballs to be a multiple of 10240. I'm not sure why: the
|
||||||
|
@ -53,6 +55,8 @@ class TarFixer(object):
|
||||||
pax extended header records have the format "%u %s=%s\n". %u contains
|
pax extended header records have the format "%u %s=%s\n". %u contains
|
||||||
the size of the whole string (including the %u), the first %s is the
|
the size of the whole string (including the %u), the first %s is the
|
||||||
keyword, the second one is the value.
|
keyword, the second one is the value.
|
||||||
|
|
||||||
|
PAX (also known as POSIX.1-2001) always encodes everything in UTF-8.
|
||||||
"""
|
"""
|
||||||
def __init__(self, fh, out, timestamp, gitref, maven_built=False):
|
def __init__(self, fh, out, timestamp, gitref, maven_built=False):
|
||||||
self.maven_built = maven_built
|
self.maven_built = maven_built
|
||||||
|
@ -96,7 +100,7 @@ class TarFixer(object):
|
||||||
|
|
||||||
# Add an '=' to use native byte order with standard sizes
|
# Add an '=' to use native byte order with standard sizes
|
||||||
self.struct_template = "=" + "".join(map(lambda x: x[1], self.tar_struct))
|
self.struct_template = "=" + "".join(map(lambda x: x[1], self.tar_struct))
|
||||||
self.struct_members = map(lambda x: x[0], self.tar_struct)
|
self.struct_members = list(map(lambda x: x[0], self.tar_struct))
|
||||||
self.struct_hash = dict(self.tar_struct)
|
self.struct_hash = dict(self.tar_struct)
|
||||||
|
|
||||||
# The tarballs created by git archive from tree IDs don't have a global
|
# The tarballs created by git archive from tree IDs don't have a global
|
||||||
|
@ -128,13 +132,22 @@ class TarFixer(object):
|
||||||
|
|
||||||
return read
|
return read
|
||||||
|
|
||||||
|
def write(self, data):
|
||||||
|
"""Write the data correctly depending on the mode of the file. While binary mode
|
||||||
|
is preferred, we support text mode for streams like stdout."""
|
||||||
|
if hasattr(self.out, 'mode') and 'b' in self.out.mode:
|
||||||
|
data = bytearray(data)
|
||||||
|
else:
|
||||||
|
data = decode_bytes(data, "utf8")
|
||||||
|
self.out.write(data)
|
||||||
|
|
||||||
def chunk_to_hash(self, chunk):
|
def chunk_to_hash(self, chunk):
|
||||||
# Our struct template is only 500 bytes, but the last 12 bytes are NUL
|
# Our struct template is only 500 bytes, but the last 12 bytes are NUL
|
||||||
# I elected to ignore them completely instead of including them in the
|
# I elected to ignore them completely instead of including them in the
|
||||||
# template as '12x'. The unpack_from method will read the bytes our
|
# template as '12x'. The unpack_from method will read the bytes our
|
||||||
# template defines from chunk and discard the rest.
|
# template defines from chunk and discard the rest.
|
||||||
unpacked = struct.unpack_from(self.struct_template, chunk)
|
unpacked = struct.unpack_from(self.struct_template, chunk)
|
||||||
|
unpacked = list(map(lambda x: decode_bytes(x, 'utf8'), unpacked))
|
||||||
# Zip what we read together with the member names and create a dictionary
|
# Zip what we read together with the member names and create a dictionary
|
||||||
chunk_props = dict(zip(self.struct_members, unpacked))
|
chunk_props = dict(zip(self.struct_members, unpacked))
|
||||||
|
|
||||||
|
@ -143,28 +156,28 @@ class TarFixer(object):
|
||||||
def padded_size(self, length, pad_size=RECORD_SIZE):
|
def padded_size(self, length, pad_size=RECORD_SIZE):
|
||||||
"""Function to pad out a length to the nearest multiple of pad_size
|
"""Function to pad out a length to the nearest multiple of pad_size
|
||||||
that can contain it."""
|
that can contain it."""
|
||||||
blocks = length / pad_size
|
blocks = length // pad_size
|
||||||
if length % pad_size != 0:
|
if length % pad_size != 0:
|
||||||
blocks += 1
|
blocks += 1
|
||||||
return blocks * pad_size
|
return blocks * pad_size
|
||||||
|
|
||||||
def create_global_header(self):
|
def create_global_header(self):
|
||||||
header_props = {
|
header_props = {
|
||||||
'name': 'pax_global_header',
|
'name': u'pax_global_header',
|
||||||
'mode': 0o666,
|
'mode': 0o666,
|
||||||
'uid': 0,
|
'uid': 0,
|
||||||
'gid': 0,
|
'gid': 0,
|
||||||
'size': 52, # The size of the extended header with the gitref
|
'size': 52, # The size of the extended header with the gitref
|
||||||
'mtime': self.timestamp,
|
'mtime': self.timestamp,
|
||||||
'typeflag': 'g',
|
'typeflag': u'g',
|
||||||
'linkname': '',
|
'linkname': u'',
|
||||||
'magic': 'ustar',
|
'magic': u'ustar',
|
||||||
'version': '00',
|
'version': u'00',
|
||||||
'uname': 'root',
|
'uname': u'root',
|
||||||
'gname': 'root',
|
'gname': u'root',
|
||||||
'devmajor': 0,
|
'devmajor': 0,
|
||||||
'devminor': 0,
|
'devminor': 0,
|
||||||
'prefix': '',
|
'prefix': u'',
|
||||||
}
|
}
|
||||||
self.process_header(header_props)
|
self.process_header(header_props)
|
||||||
|
|
||||||
|
@ -179,9 +192,10 @@ class TarFixer(object):
|
||||||
member_template = self.struct_hash[member]
|
member_template = self.struct_hash[member]
|
||||||
field_size = int(re.match('(\d+)', member_template).group(1)) - 1
|
field_size = int(re.match('(\d+)', member_template).group(1)) - 1
|
||||||
fmt = "%0" + str(field_size) + "o\x00"
|
fmt = "%0" + str(field_size) + "o\x00"
|
||||||
pack_values.append(fmt % chunk_props[member])
|
as_string = fmt % chunk_props[member]
|
||||||
|
pack_values.append(as_string.encode("utf8"))
|
||||||
else:
|
else:
|
||||||
pack_values.append(chunk_props[member])
|
pack_values.append(chunk_props[member].encode("utf8"))
|
||||||
return pack_values
|
return pack_values
|
||||||
|
|
||||||
def process_header(self, chunk_props):
|
def process_header(self, chunk_props):
|
||||||
|
@ -191,7 +205,7 @@ class TarFixer(object):
|
||||||
|
|
||||||
# The struct itself is only 500 bytes so we have to pad it to 512
|
# The struct itself is only 500 bytes so we have to pad it to 512
|
||||||
data_out = struct.pack(self.struct_template + "12x", *pack_values)
|
data_out = struct.pack(self.struct_template + "12x", *pack_values)
|
||||||
self.out.write(data_out)
|
self.write(data_out)
|
||||||
self.total_length += len(data_out)
|
self.total_length += len(data_out)
|
||||||
|
|
||||||
def process_extended_header(self):
|
def process_extended_header(self):
|
||||||
|
@ -207,13 +221,13 @@ class TarFixer(object):
|
||||||
# Since the git ref is always 40 characters we can
|
# Since the git ref is always 40 characters we can
|
||||||
# pre-compute the length to put in the extended header
|
# pre-compute the length to put in the extended header
|
||||||
comment = "52 comment=%s\n" % self.gitref
|
comment = "52 comment=%s\n" % self.gitref
|
||||||
data_out = struct.pack("=512s", comment)
|
data_out = struct.pack("=52s460x", encode_bytes(comment, "ascii"))
|
||||||
self.out.write(data_out)
|
self.write(data_out)
|
||||||
self.total_length += len(data_out)
|
self.total_length += len(data_out)
|
||||||
|
|
||||||
def process_file_data(self, size):
|
def process_file_data(self, size):
|
||||||
data_out = self.full_read(self.padded_size(size))
|
data_out = self.full_read(self.padded_size(size))
|
||||||
self.out.write(data_out)
|
self.write(data_out)
|
||||||
self.total_length += len(data_out)
|
self.total_length += len(data_out)
|
||||||
|
|
||||||
def calculate_checksum(self, chunk_props):
|
def calculate_checksum(self, chunk_props):
|
||||||
|
@ -227,17 +241,19 @@ class TarFixer(object):
|
||||||
values = self.encode_header(chunk_props)
|
values = self.encode_header(chunk_props)
|
||||||
new_chksum = 0
|
new_chksum = 0
|
||||||
for val in values:
|
for val in values:
|
||||||
val_bytes = bytearray(val, 'ASCII')
|
val_bytes = val.decode("utf8")
|
||||||
new_chksum += reduce(lambda x, y: x + y, val_bytes, 0)
|
for b in val_bytes:
|
||||||
|
new_chksum += ord(b)
|
||||||
return "%07o\x00" % new_chksum
|
return "%07o\x00" % new_chksum
|
||||||
|
|
||||||
def process_chunk(self, chunk):
|
def process_chunk(self, chunk):
|
||||||
# Tar archives end with two 512 byte blocks of zeroes
|
# Tar archives end with two 512 byte blocks of zeroes
|
||||||
if chunk == "\x00" * 512:
|
if chunk == b"\x00" * 512:
|
||||||
self.out.write(chunk)
|
self.write(b"\x00" * 512)
|
||||||
self.total_length += len(chunk)
|
self.total_length += len(chunk)
|
||||||
if self.last_chunk_was_nulls:
|
if self.last_chunk_was_nulls:
|
||||||
self.out.write("\x00" * (self.padded_size(self.total_length, GIT_BLOCK_SIZE) - self.total_length))
|
final_padding = b"\x00" * (self.padded_size(self.total_length, GIT_BLOCK_SIZE) - self.total_length)
|
||||||
|
self.write(final_padding)
|
||||||
self.done = True
|
self.done = True
|
||||||
self.last_chunk_was_nulls = True
|
self.last_chunk_was_nulls = True
|
||||||
return
|
return
|
||||||
|
@ -299,6 +315,9 @@ class TarFixer(object):
|
||||||
self.process_file_data(chunk_props['size'])
|
self.process_file_data(chunk_props['size'])
|
||||||
|
|
||||||
def fix(self):
|
def fix(self):
|
||||||
|
if 'b' not in self.fh.mode:
|
||||||
|
raise IOError("The input file must be opened in binary mode!")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
chunk = self.full_read(RECORD_SIZE)
|
chunk = self.full_read(RECORD_SIZE)
|
||||||
while chunk != "" and not self.done:
|
while chunk != "" and not self.done:
|
||||||
|
|
|
@ -2,7 +2,7 @@ import hashlib
|
||||||
import os
|
import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from tito.compat import StringIO
|
from tito.compat import StringIO, encode_bytes
|
||||||
from tito.tar import TarFixer
|
from tito.tar import TarFixer
|
||||||
from mock import Mock
|
from mock import Mock
|
||||||
|
|
||||||
|
@ -22,7 +22,8 @@ class TarTest(unittest.TestCase):
|
||||||
self.out = None
|
self.out = None
|
||||||
|
|
||||||
def hash_file(self, filename):
|
def hash_file(self, filename):
|
||||||
return self.hash_buffer(open(filename, 'rb').read())
|
file_bytes = open(filename, 'rb').read()
|
||||||
|
return self.hash_buffer(file_bytes)
|
||||||
|
|
||||||
def hash_buffer(self, buf):
|
def hash_buffer(self, buf):
|
||||||
hasher = hashlib.sha256()
|
hasher = hashlib.sha256()
|
||||||
|
@ -61,10 +62,15 @@ class TarTest(unittest.TestCase):
|
||||||
self.assertRaises(IOError, self.tarfixer.full_read, 10)
|
self.assertRaises(IOError, self.tarfixer.full_read, 10)
|
||||||
|
|
||||||
def test_fix(self):
|
def test_fix(self):
|
||||||
self.fh = open(self.test_file)
|
self.fh = open(self.test_file, 'rb')
|
||||||
self.tarfixer.fh = self.fh
|
self.tarfixer.fh = self.fh
|
||||||
self.tarfixer.fix()
|
self.tarfixer.fix()
|
||||||
self.assertEqual(self.reference_hash, self.hash_buffer("".join(self.out.buflist)))
|
self.assertEqual(self.reference_hash, self.hash_buffer(encode_bytes(self.out.getvalue(), "utf8")))
|
||||||
|
|
||||||
|
def test_fix_fails_unless_file_in_binary_mode(self):
|
||||||
|
self.fh = open(self.test_file, 'r')
|
||||||
|
self.tarfixer.fh = self.fh
|
||||||
|
self.assertRaises(IOError, self.tarfixer.fix)
|
||||||
|
|
||||||
def test_padded_size_length_small(self):
|
def test_padded_size_length_small(self):
|
||||||
length = 10
|
length = 10
|
||||||
|
@ -81,9 +87,14 @@ class TarTest(unittest.TestCase):
|
||||||
block_size = 512
|
block_size = 512
|
||||||
self.assertEqual(1024, self.tarfixer.padded_size(length, block_size))
|
self.assertEqual(1024, self.tarfixer.padded_size(length, block_size))
|
||||||
|
|
||||||
|
def test_padded_size_length_long(self):
|
||||||
|
length = 82607
|
||||||
|
block_size = 512
|
||||||
|
self.assertEqual(82944, self.tarfixer.padded_size(length, block_size))
|
||||||
|
|
||||||
def test_create_extended_header(self):
|
def test_create_extended_header(self):
|
||||||
self.tarfixer.create_extended_header()
|
self.tarfixer.create_extended_header()
|
||||||
header = "".join(self.out.buflist)
|
header = self.out.getvalue()
|
||||||
self.assertEqual(512, len(header))
|
self.assertEqual(512, len(header))
|
||||||
self.assertEqual("52 comment=%s\n" % EXPECTED_REF, header[:52])
|
self.assertEqual("52 comment=%s\n" % EXPECTED_REF, header[:52])
|
||||||
self.assertEqual("\x00" * (512 - 53), header[53:])
|
self.assertEqual("\x00" * (512 - 53), header[53:])
|
||||||
|
@ -95,7 +106,7 @@ class TarTest(unittest.TestCase):
|
||||||
'c': '\x03',
|
'c': '\x03',
|
||||||
'd': '\x04',
|
'd': '\x04',
|
||||||
}
|
}
|
||||||
self.tarfixer.struct_members = fields.keys() + ['checksum']
|
self.tarfixer.struct_members = list(fields.keys()) + ['checksum']
|
||||||
result = self.tarfixer.calculate_checksum(fields)
|
result = self.tarfixer.calculate_checksum(fields)
|
||||||
expected_result = 10 + ord(" ") * 8
|
expected_result = 10 + ord(" ") * 8
|
||||||
self.assertEqual("%07o\x00" % expected_result, result)
|
self.assertEqual("%07o\x00" % expected_result, result)
|
||||||
|
@ -107,5 +118,6 @@ class TarTest(unittest.TestCase):
|
||||||
'name': 'hello',
|
'name': 'hello',
|
||||||
}
|
}
|
||||||
result = self.tarfixer.encode_header(chunk, ['mode', 'name'])
|
result = self.tarfixer.encode_header(chunk, ['mode', 'name'])
|
||||||
expected_result = ["%07o\x00" % mode, 'hello']
|
expected_result = ["%07o\x00" % mode, "hello"]
|
||||||
self.assertEqual(result, expected_result)
|
expected_result = list(map(lambda x: encode_bytes(x, "utf8"), expected_result))
|
||||||
|
self.assertEqual(expected_result, result)
|
||||||
|
|
Loading…
Add table
Reference in a new issue