mirror of
https://github.com/rpm-software-management/tito.git
synced 2025-02-23 12:12:47 +00:00
Merge pull request #336 from awood/master
Fix UnicodeDecodeError caused by source tarballs with utf8 names.
This commit is contained in:
commit
86dc621bf4
8 changed files with 78 additions and 37 deletions
1
requirements.txt
Normal file
1
requirements.txt
Normal file
|
@ -0,0 +1 @@
|
|||
blessings
|
3
setup.py
3
setup.py
|
@ -33,6 +33,9 @@ setup(
|
|||
},
|
||||
packages=find_packages('src'),
|
||||
include_package_data=True,
|
||||
install_requires=[
|
||||
'blessings'
|
||||
],
|
||||
|
||||
# non-python scripts go here
|
||||
scripts=[
|
||||
|
|
|
@ -26,26 +26,34 @@ if PY2:
|
|||
from ConfigParser import RawConfigParser
|
||||
from StringIO import StringIO
|
||||
import xmlrpclib
|
||||
text_type = unicode
|
||||
binary_type = str
|
||||
else:
|
||||
import subprocess
|
||||
from configparser import NoOptionError
|
||||
from configparser import RawConfigParser
|
||||
from io import StringIO
|
||||
import xmlrpc.client as xmlrpclib
|
||||
text_type = str
|
||||
binary_type = bytes
|
||||
|
||||
|
||||
def decode_bytes(x, source_encoding):
|
||||
if PY2:
|
||||
def ensure_text(x, encoding="utf8"):
|
||||
if isinstance(x, binary_type):
|
||||
return x.decode(encoding)
|
||||
elif isinstance(x, text_type):
|
||||
return x
|
||||
else:
|
||||
return x.decode(source_encoding)
|
||||
raise TypeError("Not expecting type '%s'" % type(x))
|
||||
|
||||
|
||||
def encode_bytes(x, destination_encoding):
|
||||
if PY2:
|
||||
def ensure_binary(x, encoding="utf8"):
|
||||
if isinstance(x, text_type):
|
||||
return x.encode(encoding)
|
||||
elif isinstance(x, binary_type):
|
||||
return x
|
||||
else:
|
||||
return bytes(x, destination_encoding)
|
||||
raise TypeError("Not expecting type '%s'" % type(x))
|
||||
|
||||
|
||||
def getstatusoutput(cmd):
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
import re
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from tito.compat import decode_bytes, encode_bytes
|
||||
import codecs
|
||||
import tito.compat
|
||||
|
||||
RECORD_SIZE = 512
|
||||
|
||||
|
@ -120,7 +120,7 @@ class TarFixer(object):
|
|||
def full_read(self, read_size):
|
||||
read = self.fh.read(read_size)
|
||||
amount_read = len(read)
|
||||
while (amount_read < read_size):
|
||||
while amount_read < read_size:
|
||||
left_to_read = read_size - amount_read
|
||||
next_read = self.fh.read(left_to_read)
|
||||
|
||||
|
@ -133,13 +133,7 @@ class TarFixer(object):
|
|||
return read
|
||||
|
||||
def write(self, data):
|
||||
"""Write the data correctly depending on the mode of the file. While binary mode
|
||||
is preferred, we support text mode for streams like stdout."""
|
||||
if hasattr(self.out, 'mode') and 'b' in self.out.mode:
|
||||
data = bytearray(data)
|
||||
else:
|
||||
data = decode_bytes(data, "utf8")
|
||||
self.out.write(data)
|
||||
self.out.write(tito.compat.ensure_binary(data))
|
||||
|
||||
def chunk_to_hash(self, chunk):
|
||||
# Our struct template is only 500 bytes, but the last 12 bytes are NUL
|
||||
|
@ -147,7 +141,7 @@ class TarFixer(object):
|
|||
# template as '12x'. The unpack_from method will read the bytes our
|
||||
# template defines from chunk and discard the rest.
|
||||
unpacked = struct.unpack_from(self.struct_template, chunk)
|
||||
unpacked = list(map(lambda x: decode_bytes(x, 'utf8'), unpacked))
|
||||
unpacked = list(map(lambda x: tito.compat.ensure_text(x), unpacked))
|
||||
# Zip what we read together with the member names and create a dictionary
|
||||
chunk_props = dict(zip(self.struct_members, unpacked))
|
||||
|
||||
|
@ -193,9 +187,9 @@ class TarFixer(object):
|
|||
field_size = int(re.match('(\d+)', member_template).group(1)) - 1
|
||||
fmt = "%0" + str(field_size) + "o\x00"
|
||||
as_string = fmt % chunk_props[member]
|
||||
pack_values.append(as_string.encode("utf8"))
|
||||
pack_values.append(tito.compat.ensure_binary(as_string))
|
||||
else:
|
||||
pack_values.append(chunk_props[member].encode("utf8"))
|
||||
pack_values.append(tito.compat.ensure_binary(chunk_props[member]))
|
||||
return pack_values
|
||||
|
||||
def process_header(self, chunk_props):
|
||||
|
@ -218,10 +212,10 @@ class TarFixer(object):
|
|||
# the size of the whole string (including the %u), the first %s is the
|
||||
# keyword, the second one is the value.
|
||||
#
|
||||
# Since the git ref is always 40 characters we can
|
||||
# pre-compute the length to put in the extended header
|
||||
# Since the git ref is always 40 ASCII characters we can pre-compute the length
|
||||
# to put in the extended header
|
||||
comment = "52 comment=%s\n" % self.gitref
|
||||
data_out = struct.pack("=52s460x", encode_bytes(comment, "ascii"))
|
||||
data_out = struct.pack("=52s460x", tito.compat.ensure_binary(comment, "ascii"))
|
||||
self.write(data_out)
|
||||
self.total_length += len(data_out)
|
||||
|
||||
|
@ -241,9 +235,9 @@ class TarFixer(object):
|
|||
values = self.encode_header(chunk_props)
|
||||
new_chksum = 0
|
||||
for val in values:
|
||||
val_bytes = val.decode("utf8")
|
||||
val_bytes = bytearray(tito.compat.ensure_binary(val))
|
||||
for b in val_bytes:
|
||||
new_chksum += ord(b)
|
||||
new_chksum += b
|
||||
return "%07o\x00" % new_chksum
|
||||
|
||||
def process_chunk(self, chunk):
|
||||
|
@ -336,8 +330,8 @@ class TarFixer(object):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 4:
|
||||
sys.exit("Usage: %s UNIX_TIMESTAMP GIT_HASH TAR_FILE" % sys.argv[0])
|
||||
if len(sys.argv) != 5:
|
||||
sys.exit("Usage: %s UNIX_TIMESTAMP GIT_HASH TAR_FILE DESTINATION_FILE" % sys.argv[0])
|
||||
|
||||
try:
|
||||
timestamp = int(sys.argv[1])
|
||||
|
@ -346,11 +340,17 @@ if __name__ == '__main__':
|
|||
|
||||
gitref = sys.argv[2]
|
||||
tar_file = sys.argv[3]
|
||||
destination_file = sys.argv[4]
|
||||
|
||||
try:
|
||||
dfh = open(destination_file, 'wb')
|
||||
except:
|
||||
print("Could not open %s" % destination_file)
|
||||
|
||||
try:
|
||||
fh = open(tar_file, 'rb')
|
||||
except:
|
||||
print("Could not read %s" % tar_file)
|
||||
|
||||
reader = TarFixer(fh, sys.stdout, timestamp, gitref)
|
||||
reader = TarFixer(fh, dfh, timestamp, gitref)
|
||||
reader.fix()
|
||||
|
|
|
@ -5,10 +5,8 @@ Version: 0.0.1
|
|||
Release: 1%{?dist}
|
||||
Summary: tito test package for the external source builder
|
||||
URL: https://example.com
|
||||
Group: Applications/Internet
|
||||
License: GPLv2
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
BuildRoot: %{_tmppath}/%{name}-root-%(%{__id_u} -n)
|
||||
BuildArch: noarch
|
||||
|
||||
%description
|
||||
|
@ -19,10 +17,8 @@ Nobody cares.
|
|||
%build
|
||||
|
||||
%install
|
||||
rm -rf %{buildroot}
|
||||
|
||||
%clean
|
||||
rm -rf %{buildroot}
|
||||
|
||||
%files
|
||||
%defattr(-,root,root)
|
||||
|
|
BIN
test/unit/resources/archivé.tar
Normal file
BIN
test/unit/resources/archivé.tar
Normal file
Binary file not shown.
BIN
test/unit/resources/les_misérables.tar
Normal file
BIN
test/unit/resources/les_misérables.tar
Normal file
Binary file not shown.
|
@ -1,8 +1,11 @@
|
|||
# coding=utf-8
|
||||
import hashlib
|
||||
import os
|
||||
import tarfile
|
||||
import unittest
|
||||
import io
|
||||
|
||||
from tito.compat import StringIO, encode_bytes
|
||||
from tito.compat import StringIO, ensure_binary
|
||||
from tito.tar import TarFixer
|
||||
from mock import Mock
|
||||
|
||||
|
@ -12,8 +15,10 @@ EXPECTED_REF = "3518d720bff20db887b7a5e5dddd411d14dca1f9"
|
|||
|
||||
class TarTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.out = StringIO()
|
||||
self.out = io.BytesIO()
|
||||
self.tarfixer = TarFixer(None, self.out, EXPECTED_TIMESTAMP, EXPECTED_REF)
|
||||
self.utf8_containing_file = os.path.join(os.path.dirname(__file__), 'resources', 'les_misérables.tar')
|
||||
self.utf8_file = os.path.join(os.path.dirname(__file__), 'resources', 'archivé.tar')
|
||||
self.test_file = os.path.join(os.path.dirname(__file__), 'resources', 'archive.tar')
|
||||
self.reference_file = os.path.join(os.path.dirname(__file__), 'resources', 'archive-fixed.tar')
|
||||
self.reference_hash = self.hash_file(self.reference_file)
|
||||
|
@ -65,7 +70,7 @@ class TarTest(unittest.TestCase):
|
|||
self.fh = open(self.test_file, 'rb')
|
||||
self.tarfixer.fh = self.fh
|
||||
self.tarfixer.fix()
|
||||
self.assertEqual(self.reference_hash, self.hash_buffer(encode_bytes(self.out.getvalue(), "utf8")))
|
||||
self.assertEqual(self.reference_hash, self.hash_buffer(self.out.getvalue()))
|
||||
|
||||
def test_fix_fails_unless_file_in_binary_mode(self):
|
||||
self.fh = open(self.test_file, 'r')
|
||||
|
@ -96,8 +101,8 @@ class TarTest(unittest.TestCase):
|
|||
self.tarfixer.create_extended_header()
|
||||
header = self.out.getvalue()
|
||||
self.assertEqual(512, len(header))
|
||||
self.assertEqual("52 comment=%s\n" % EXPECTED_REF, header[:52])
|
||||
self.assertEqual("\x00" * (512 - 53), header[53:])
|
||||
self.assertEqual(ensure_binary("52 comment=%s\n" % EXPECTED_REF), header[:52])
|
||||
self.assertEqual(ensure_binary("\x00" * (512 - 53)), header[53:])
|
||||
|
||||
def test_calculate_checksum(self):
|
||||
fields = {
|
||||
|
@ -119,5 +124,33 @@ class TarTest(unittest.TestCase):
|
|||
}
|
||||
result = self.tarfixer.encode_header(chunk, ['mode', 'name'])
|
||||
expected_result = ["%07o\x00" % mode, "hello"]
|
||||
expected_result = list(map(lambda x: encode_bytes(x, "utf8"), expected_result))
|
||||
expected_result = list(map(lambda x: ensure_binary(x), expected_result))
|
||||
self.assertEqual(expected_result, result)
|
||||
|
||||
def test_utf8_file(self):
|
||||
# The goal of this test is to *not* throw a UnicodeDecodeError
|
||||
self.fh = open(self.utf8_file, 'rb')
|
||||
self.tarfixer.fh = self.fh
|
||||
self.tarfixer.fix()
|
||||
|
||||
self.assertEqual(self.reference_hash, self.hash_buffer(self.out.getvalue()))
|
||||
|
||||
# rewind the buffer
|
||||
self.out.seek(0)
|
||||
try:
|
||||
tarball = tarfile.open(fileobj=self.out, mode="r")
|
||||
except tarfile.TarError:
|
||||
self.fail("Unable to open generated tarball")
|
||||
|
||||
def test_utf8_containing_file(self):
|
||||
# # The goal of this test is to *not* blow up due to a corrupted tarball
|
||||
self.fh = open(self.utf8_containing_file, 'rb')
|
||||
self.tarfixer.fh = self.fh
|
||||
self.tarfixer.fix()
|
||||
|
||||
# rewind the buffer
|
||||
self.out.seek(0)
|
||||
try:
|
||||
tarball = tarfile.open(fileobj=self.out, mode="r")
|
||||
except tarfile.TarError as e:
|
||||
self.fail("Unable to open generated tarball: %s" % e)
|
||||
|
|
Loading…
Add table
Reference in a new issue