mirror of
https://github.com/rpm-software-management/tito.git
synced 2025-02-23 12:12:47 +00:00
Fix Python 3 issues with binary versus string types.
Python 3 is very picky about not mixing binary and string data. This patch gets TarFixer running on both Python 2.6+ and Python 3.x.
This commit is contained in:
parent
07d62cf24e
commit
a5b43b6b96
3 changed files with 75 additions and 30 deletions
|
@ -34,6 +34,20 @@ else:
|
|||
import xmlrpc.client as xmlrpclib
|
||||
|
||||
|
||||
def decode_bytes(x, source_encoding):
|
||||
if PY2:
|
||||
return x
|
||||
else:
|
||||
return x.decode(source_encoding)
|
||||
|
||||
|
||||
def encode_bytes(x, destination_encoding):
|
||||
if PY2:
|
||||
return x
|
||||
else:
|
||||
return bytes(x, destination_encoding)
|
||||
|
||||
|
||||
def getstatusoutput(cmd):
|
||||
"""
|
||||
Returns (status, output) of executing cmd in a shell.
|
||||
|
|
|
@ -15,6 +15,8 @@ import re
|
|||
import struct
|
||||
import sys
|
||||
|
||||
from tito.compat import decode_bytes, encode_bytes
|
||||
|
||||
RECORD_SIZE = 512
|
||||
|
||||
# Git writes its tarballs to be a multiple of 10240. I'm not sure why: the
|
||||
|
@ -53,6 +55,8 @@ class TarFixer(object):
|
|||
pax extended header records have the format "%u %s=%s\n". %u contains
|
||||
the size of the whole string (including the %u), the first %s is the
|
||||
keyword, the second one is the value.
|
||||
|
||||
PAX (also known as POSIX.1-2001) always encodes everything in UTF-8.
|
||||
"""
|
||||
def __init__(self, fh, out, timestamp, gitref, maven_built=False):
|
||||
self.maven_built = maven_built
|
||||
|
@ -96,7 +100,7 @@ class TarFixer(object):
|
|||
|
||||
# Add an '=' to use native byte order with standard sizes
|
||||
self.struct_template = "=" + "".join(map(lambda x: x[1], self.tar_struct))
|
||||
self.struct_members = map(lambda x: x[0], self.tar_struct)
|
||||
self.struct_members = list(map(lambda x: x[0], self.tar_struct))
|
||||
self.struct_hash = dict(self.tar_struct)
|
||||
|
||||
# The tarballs created by git archive from tree IDs don't have a global
|
||||
|
@ -128,13 +132,22 @@ class TarFixer(object):
|
|||
|
||||
return read
|
||||
|
||||
def write(self, data):
|
||||
"""Write the data correctly depending on the mode of the file. While binary mode
|
||||
is preferred, we support text mode for streams like stdout."""
|
||||
if hasattr(self.out, 'mode') and 'b' in self.out.mode:
|
||||
data = bytearray(data)
|
||||
else:
|
||||
data = decode_bytes(data, "utf8")
|
||||
self.out.write(data)
|
||||
|
||||
def chunk_to_hash(self, chunk):
|
||||
# Our struct template is only 500 bytes, but the last 12 bytes are NUL
|
||||
# I elected to ignore them completely instead of including them in the
|
||||
# template as '12x'. The unpack_from method will read the bytes our
|
||||
# template defines from chunk and discard the rest.
|
||||
unpacked = struct.unpack_from(self.struct_template, chunk)
|
||||
|
||||
unpacked = list(map(lambda x: decode_bytes(x, 'utf8'), unpacked))
|
||||
# Zip what we read together with the member names and create a dictionary
|
||||
chunk_props = dict(zip(self.struct_members, unpacked))
|
||||
|
||||
|
@ -143,28 +156,28 @@ class TarFixer(object):
|
|||
def padded_size(self, length, pad_size=RECORD_SIZE):
|
||||
"""Function to pad out a length to the nearest multiple of pad_size
|
||||
that can contain it."""
|
||||
blocks = length / pad_size
|
||||
blocks = length // pad_size
|
||||
if length % pad_size != 0:
|
||||
blocks += 1
|
||||
return blocks * pad_size
|
||||
|
||||
def create_global_header(self):
|
||||
header_props = {
|
||||
'name': 'pax_global_header',
|
||||
'name': u'pax_global_header',
|
||||
'mode': 0o666,
|
||||
'uid': 0,
|
||||
'gid': 0,
|
||||
'size': 52, # The size of the extended header with the gitref
|
||||
'mtime': self.timestamp,
|
||||
'typeflag': 'g',
|
||||
'linkname': '',
|
||||
'magic': 'ustar',
|
||||
'version': '00',
|
||||
'uname': 'root',
|
||||
'gname': 'root',
|
||||
'typeflag': u'g',
|
||||
'linkname': u'',
|
||||
'magic': u'ustar',
|
||||
'version': u'00',
|
||||
'uname': u'root',
|
||||
'gname': u'root',
|
||||
'devmajor': 0,
|
||||
'devminor': 0,
|
||||
'prefix': '',
|
||||
'prefix': u'',
|
||||
}
|
||||
self.process_header(header_props)
|
||||
|
||||
|
@ -179,9 +192,10 @@ class TarFixer(object):
|
|||
member_template = self.struct_hash[member]
|
||||
field_size = int(re.match('(\d+)', member_template).group(1)) - 1
|
||||
fmt = "%0" + str(field_size) + "o\x00"
|
||||
pack_values.append(fmt % chunk_props[member])
|
||||
as_string = fmt % chunk_props[member]
|
||||
pack_values.append(as_string.encode("utf8"))
|
||||
else:
|
||||
pack_values.append(chunk_props[member])
|
||||
pack_values.append(chunk_props[member].encode("utf8"))
|
||||
return pack_values
|
||||
|
||||
def process_header(self, chunk_props):
|
||||
|
@ -191,7 +205,7 @@ class TarFixer(object):
|
|||
|
||||
# The struct itself is only 500 bytes so we have to pad it to 512
|
||||
data_out = struct.pack(self.struct_template + "12x", *pack_values)
|
||||
self.out.write(data_out)
|
||||
self.write(data_out)
|
||||
self.total_length += len(data_out)
|
||||
|
||||
def process_extended_header(self):
|
||||
|
@ -207,13 +221,13 @@ class TarFixer(object):
|
|||
# Since the git ref is always 40 characters we can
|
||||
# pre-compute the length to put in the extended header
|
||||
comment = "52 comment=%s\n" % self.gitref
|
||||
data_out = struct.pack("=512s", comment)
|
||||
self.out.write(data_out)
|
||||
data_out = struct.pack("=52s460x", encode_bytes(comment, "ascii"))
|
||||
self.write(data_out)
|
||||
self.total_length += len(data_out)
|
||||
|
||||
def process_file_data(self, size):
|
||||
data_out = self.full_read(self.padded_size(size))
|
||||
self.out.write(data_out)
|
||||
self.write(data_out)
|
||||
self.total_length += len(data_out)
|
||||
|
||||
def calculate_checksum(self, chunk_props):
|
||||
|
@ -227,17 +241,19 @@ class TarFixer(object):
|
|||
values = self.encode_header(chunk_props)
|
||||
new_chksum = 0
|
||||
for val in values:
|
||||
val_bytes = bytearray(val, 'ASCII')
|
||||
new_chksum += reduce(lambda x, y: x + y, val_bytes, 0)
|
||||
val_bytes = val.decode("utf8")
|
||||
for b in val_bytes:
|
||||
new_chksum += ord(b)
|
||||
return "%07o\x00" % new_chksum
|
||||
|
||||
def process_chunk(self, chunk):
|
||||
# Tar archives end with two 512 byte blocks of zeroes
|
||||
if chunk == "\x00" * 512:
|
||||
self.out.write(chunk)
|
||||
if chunk == b"\x00" * 512:
|
||||
self.write(b"\x00" * 512)
|
||||
self.total_length += len(chunk)
|
||||
if self.last_chunk_was_nulls:
|
||||
self.out.write("\x00" * (self.padded_size(self.total_length, GIT_BLOCK_SIZE) - self.total_length))
|
||||
final_padding = b"\x00" * (self.padded_size(self.total_length, GIT_BLOCK_SIZE) - self.total_length)
|
||||
self.write(final_padding)
|
||||
self.done = True
|
||||
self.last_chunk_was_nulls = True
|
||||
return
|
||||
|
@ -299,6 +315,9 @@ class TarFixer(object):
|
|||
self.process_file_data(chunk_props['size'])
|
||||
|
||||
def fix(self):
|
||||
if 'b' not in self.fh.mode:
|
||||
raise IOError("The input file must be opened in binary mode!")
|
||||
|
||||
try:
|
||||
chunk = self.full_read(RECORD_SIZE)
|
||||
while chunk != "" and not self.done:
|
||||
|
|
|
@ -2,7 +2,7 @@ import hashlib
|
|||
import os
|
||||
import unittest
|
||||
|
||||
from tito.compat import StringIO
|
||||
from tito.compat import StringIO, encode_bytes
|
||||
from tito.tar import TarFixer
|
||||
from mock import Mock
|
||||
|
||||
|
@ -22,7 +22,8 @@ class TarTest(unittest.TestCase):
|
|||
self.out = None
|
||||
|
||||
def hash_file(self, filename):
|
||||
return self.hash_buffer(open(filename, 'rb').read())
|
||||
file_bytes = open(filename, 'rb').read()
|
||||
return self.hash_buffer(file_bytes)
|
||||
|
||||
def hash_buffer(self, buf):
|
||||
hasher = hashlib.sha256()
|
||||
|
@ -61,10 +62,15 @@ class TarTest(unittest.TestCase):
|
|||
self.assertRaises(IOError, self.tarfixer.full_read, 10)
|
||||
|
||||
def test_fix(self):
|
||||
self.fh = open(self.test_file)
|
||||
self.fh = open(self.test_file, 'rb')
|
||||
self.tarfixer.fh = self.fh
|
||||
self.tarfixer.fix()
|
||||
self.assertEqual(self.reference_hash, self.hash_buffer("".join(self.out.buflist)))
|
||||
self.assertEqual(self.reference_hash, self.hash_buffer(encode_bytes(self.out.getvalue(), "utf8")))
|
||||
|
||||
def test_fix_fails_unless_file_in_binary_mode(self):
|
||||
self.fh = open(self.test_file, 'r')
|
||||
self.tarfixer.fh = self.fh
|
||||
self.assertRaises(IOError, self.tarfixer.fix)
|
||||
|
||||
def test_padded_size_length_small(self):
|
||||
length = 10
|
||||
|
@ -81,9 +87,14 @@ class TarTest(unittest.TestCase):
|
|||
block_size = 512
|
||||
self.assertEqual(1024, self.tarfixer.padded_size(length, block_size))
|
||||
|
||||
def test_padded_size_length_long(self):
|
||||
length = 82607
|
||||
block_size = 512
|
||||
self.assertEqual(82944, self.tarfixer.padded_size(length, block_size))
|
||||
|
||||
def test_create_extended_header(self):
|
||||
self.tarfixer.create_extended_header()
|
||||
header = "".join(self.out.buflist)
|
||||
header = self.out.getvalue()
|
||||
self.assertEqual(512, len(header))
|
||||
self.assertEqual("52 comment=%s\n" % EXPECTED_REF, header[:52])
|
||||
self.assertEqual("\x00" * (512 - 53), header[53:])
|
||||
|
@ -95,7 +106,7 @@ class TarTest(unittest.TestCase):
|
|||
'c': '\x03',
|
||||
'd': '\x04',
|
||||
}
|
||||
self.tarfixer.struct_members = fields.keys() + ['checksum']
|
||||
self.tarfixer.struct_members = list(fields.keys()) + ['checksum']
|
||||
result = self.tarfixer.calculate_checksum(fields)
|
||||
expected_result = 10 + ord(" ") * 8
|
||||
self.assertEqual("%07o\x00" % expected_result, result)
|
||||
|
@ -107,5 +118,6 @@ class TarTest(unittest.TestCase):
|
|||
'name': 'hello',
|
||||
}
|
||||
result = self.tarfixer.encode_header(chunk, ['mode', 'name'])
|
||||
expected_result = ["%07o\x00" % mode, 'hello']
|
||||
self.assertEqual(result, expected_result)
|
||||
expected_result = ["%07o\x00" % mode, "hello"]
|
||||
expected_result = list(map(lambda x: encode_bytes(x, "utf8"), expected_result))
|
||||
self.assertEqual(expected_result, result)
|
||||
|
|
Loading…
Add table
Reference in a new issue