mirror of
https://github.com/rpm-software-management/tito.git
synced 2025-02-23 12:12:47 +00:00
Revert "Fix #335. Handle source tarballs with UTF8 characters in the name."
This reverts commit 03509b36d5
.
This commit is contained in:
parent
d6c7824177
commit
c2c4c5308f
5 changed files with 33 additions and 74 deletions
|
@ -26,34 +26,26 @@ if PY2:
|
|||
from ConfigParser import RawConfigParser
|
||||
from StringIO import StringIO
|
||||
import xmlrpclib
|
||||
text_type = unicode
|
||||
binary_type = str
|
||||
else:
|
||||
import subprocess
|
||||
from configparser import NoOptionError
|
||||
from configparser import RawConfigParser
|
||||
from io import StringIO
|
||||
import xmlrpc.client as xmlrpclib
|
||||
text_type = str
|
||||
binary_type = bytes
|
||||
|
||||
|
||||
def ensure_text(x, encoding="utf8"):
|
||||
if isinstance(x, binary_type):
|
||||
return x.decode(encoding)
|
||||
elif isinstance(x, text_type):
|
||||
def decode_bytes(x, source_encoding):
|
||||
if PY2:
|
||||
return x
|
||||
else:
|
||||
raise TypeError("Not expecting type '%s'" % type(x))
|
||||
return x.decode(source_encoding)
|
||||
|
||||
|
||||
def ensure_binary(x, encoding="utf8"):
|
||||
if isinstance(x, text_type):
|
||||
return x.encode(encoding)
|
||||
elif isinstance(x, binary_type):
|
||||
def encode_bytes(x, destination_encoding):
|
||||
if PY2:
|
||||
return x
|
||||
else:
|
||||
raise TypeError("Not expecting type '%s'" % type(x))
|
||||
return bytes(x, destination_encoding)
|
||||
|
||||
|
||||
def getstatusoutput(cmd):
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
import re
|
||||
import struct
|
||||
import sys
|
||||
import codecs
|
||||
import tito.compat
|
||||
|
||||
from tito.compat import decode_bytes, encode_bytes
|
||||
|
||||
RECORD_SIZE = 512
|
||||
|
||||
|
@ -120,7 +120,7 @@ class TarFixer(object):
|
|||
def full_read(self, read_size):
|
||||
read = self.fh.read(read_size)
|
||||
amount_read = len(read)
|
||||
while amount_read < read_size:
|
||||
while (amount_read < read_size):
|
||||
left_to_read = read_size - amount_read
|
||||
next_read = self.fh.read(left_to_read)
|
||||
|
||||
|
@ -133,7 +133,13 @@ class TarFixer(object):
|
|||
return read
|
||||
|
||||
def write(self, data):
|
||||
self.out.write(tito.compat.ensure_binary(data))
|
||||
"""Write the data correctly depending on the mode of the file. While binary mode
|
||||
is preferred, we support text mode for streams like stdout."""
|
||||
if hasattr(self.out, 'mode') and 'b' in self.out.mode:
|
||||
data = bytearray(data)
|
||||
else:
|
||||
data = decode_bytes(data, "utf8")
|
||||
self.out.write(data)
|
||||
|
||||
def chunk_to_hash(self, chunk):
|
||||
# Our struct template is only 500 bytes, but the last 12 bytes are NUL
|
||||
|
@ -141,7 +147,7 @@ class TarFixer(object):
|
|||
# template as '12x'. The unpack_from method will read the bytes our
|
||||
# template defines from chunk and discard the rest.
|
||||
unpacked = struct.unpack_from(self.struct_template, chunk)
|
||||
unpacked = list(map(lambda x: tito.compat.ensure_text(x), unpacked))
|
||||
unpacked = list(map(lambda x: decode_bytes(x, 'utf8'), unpacked))
|
||||
# Zip what we read together with the member names and create a dictionary
|
||||
chunk_props = dict(zip(self.struct_members, unpacked))
|
||||
|
||||
|
@ -187,9 +193,9 @@ class TarFixer(object):
|
|||
field_size = int(re.match('(\d+)', member_template).group(1)) - 1
|
||||
fmt = "%0" + str(field_size) + "o\x00"
|
||||
as_string = fmt % chunk_props[member]
|
||||
pack_values.append(tito.compat.ensure_binary(as_string))
|
||||
pack_values.append(as_string.encode("utf8"))
|
||||
else:
|
||||
pack_values.append(tito.compat.ensure_binary(chunk_props[member]))
|
||||
pack_values.append(chunk_props[member].encode("utf8"))
|
||||
return pack_values
|
||||
|
||||
def process_header(self, chunk_props):
|
||||
|
@ -212,10 +218,10 @@ class TarFixer(object):
|
|||
# the size of the whole string (including the %u), the first %s is the
|
||||
# keyword, the second one is the value.
|
||||
#
|
||||
# Since the git ref is always 40 ASCII characters we can pre-compute the length
|
||||
# to put in the extended header
|
||||
# Since the git ref is always 40 characters we can
|
||||
# pre-compute the length to put in the extended header
|
||||
comment = "52 comment=%s\n" % self.gitref
|
||||
data_out = struct.pack("=52s460x", tito.compat.ensure_binary(comment, "ascii"))
|
||||
data_out = struct.pack("=52s460x", encode_bytes(comment, "ascii"))
|
||||
self.write(data_out)
|
||||
self.total_length += len(data_out)
|
||||
|
||||
|
@ -235,9 +241,9 @@ class TarFixer(object):
|
|||
values = self.encode_header(chunk_props)
|
||||
new_chksum = 0
|
||||
for val in values:
|
||||
val_bytes = bytearray(tito.compat.ensure_binary(val))
|
||||
val_bytes = val.decode("utf8")
|
||||
for b in val_bytes:
|
||||
new_chksum += b
|
||||
new_chksum += ord(b)
|
||||
return "%07o\x00" % new_chksum
|
||||
|
||||
def process_chunk(self, chunk):
|
||||
|
@ -330,8 +336,8 @@ class TarFixer(object):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 5:
|
||||
sys.exit("Usage: %s UNIX_TIMESTAMP GIT_HASH TAR_FILE DESTINATION_FILE" % sys.argv[0])
|
||||
if len(sys.argv) != 4:
|
||||
sys.exit("Usage: %s UNIX_TIMESTAMP GIT_HASH TAR_FILE" % sys.argv[0])
|
||||
|
||||
try:
|
||||
timestamp = int(sys.argv[1])
|
||||
|
@ -340,17 +346,11 @@ if __name__ == '__main__':
|
|||
|
||||
gitref = sys.argv[2]
|
||||
tar_file = sys.argv[3]
|
||||
destination_file = sys.argv[4]
|
||||
|
||||
try:
|
||||
dfh = open(destination_file, 'wb')
|
||||
except:
|
||||
print("Could not open %s" % destination_file)
|
||||
|
||||
try:
|
||||
fh = open(tar_file, 'rb')
|
||||
except:
|
||||
print("Could not read %s" % tar_file)
|
||||
|
||||
reader = TarFixer(fh, dfh, timestamp, gitref)
|
||||
reader = TarFixer(fh, sys.stdout, timestamp, gitref)
|
||||
reader.fix()
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -1,11 +1,8 @@
|
|||
# coding=utf-8
|
||||
import hashlib
|
||||
import os
|
||||
import tarfile
|
||||
import unittest
|
||||
import io
|
||||
|
||||
from tito.compat import StringIO, ensure_binary
|
||||
from tito.compat import StringIO, encode_bytes
|
||||
from tito.tar import TarFixer
|
||||
from mock import Mock
|
||||
|
||||
|
@ -15,10 +12,8 @@ EXPECTED_REF = "3518d720bff20db887b7a5e5dddd411d14dca1f9"
|
|||
|
||||
class TarTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.out = io.BytesIO()
|
||||
self.out = StringIO()
|
||||
self.tarfixer = TarFixer(None, self.out, EXPECTED_TIMESTAMP, EXPECTED_REF)
|
||||
self.utf8_containing_file = os.path.join(os.path.dirname(__file__), 'resources', 'les_misérables.tar')
|
||||
self.utf8_file = os.path.join(os.path.dirname(__file__), 'resources', 'archivé.tar')
|
||||
self.test_file = os.path.join(os.path.dirname(__file__), 'resources', 'archive.tar')
|
||||
self.reference_file = os.path.join(os.path.dirname(__file__), 'resources', 'archive-fixed.tar')
|
||||
self.reference_hash = self.hash_file(self.reference_file)
|
||||
|
@ -70,7 +65,7 @@ class TarTest(unittest.TestCase):
|
|||
self.fh = open(self.test_file, 'rb')
|
||||
self.tarfixer.fh = self.fh
|
||||
self.tarfixer.fix()
|
||||
self.assertEqual(self.reference_hash, self.hash_buffer(self.out.getvalue()))
|
||||
self.assertEqual(self.reference_hash, self.hash_buffer(encode_bytes(self.out.getvalue(), "utf8")))
|
||||
|
||||
def test_fix_fails_unless_file_in_binary_mode(self):
|
||||
self.fh = open(self.test_file, 'r')
|
||||
|
@ -101,8 +96,8 @@ class TarTest(unittest.TestCase):
|
|||
self.tarfixer.create_extended_header()
|
||||
header = self.out.getvalue()
|
||||
self.assertEqual(512, len(header))
|
||||
self.assertEqual(ensure_binary("52 comment=%s\n" % EXPECTED_REF), header[:52])
|
||||
self.assertEqual(ensure_binary("\x00" * (512 - 53)), header[53:])
|
||||
self.assertEqual("52 comment=%s\n" % EXPECTED_REF, header[:52])
|
||||
self.assertEqual("\x00" * (512 - 53), header[53:])
|
||||
|
||||
def test_calculate_checksum(self):
|
||||
fields = {
|
||||
|
@ -124,33 +119,5 @@ class TarTest(unittest.TestCase):
|
|||
}
|
||||
result = self.tarfixer.encode_header(chunk, ['mode', 'name'])
|
||||
expected_result = ["%07o\x00" % mode, "hello"]
|
||||
expected_result = list(map(lambda x: ensure_binary(x), expected_result))
|
||||
expected_result = list(map(lambda x: encode_bytes(x, "utf8"), expected_result))
|
||||
self.assertEqual(expected_result, result)
|
||||
|
||||
def test_utf8_file(self):
|
||||
# The goal of this test is to *not* throw a UnicodeDecodeError
|
||||
self.fh = open(self.utf8_file, 'rb')
|
||||
self.tarfixer.fh = self.fh
|
||||
self.tarfixer.fix()
|
||||
|
||||
self.assertEqual(self.reference_hash, self.hash_buffer(self.out.getvalue()))
|
||||
|
||||
# rewind the buffer
|
||||
self.out.seek(0)
|
||||
try:
|
||||
tarball = tarfile.open(fileobj=self.out, mode="r")
|
||||
except tarfile.TarError:
|
||||
self.fail("Unable to open generated tarball")
|
||||
|
||||
def test_utf8_containing_file(self):
|
||||
# # The goal of this test is to *not* blow up due to a corrupted tarball
|
||||
self.fh = open(self.utf8_containing_file, 'rb')
|
||||
self.tarfixer.fh = self.fh
|
||||
self.tarfixer.fix()
|
||||
|
||||
# rewind the buffer
|
||||
self.out.seek(0)
|
||||
try:
|
||||
tarball = tarfile.open(fileobj=self.out, mode="r")
|
||||
except tarfile.TarError as e:
|
||||
self.fail("Unable to open generated tarball: %s" % e)
|
||||
|
|
Loading…
Add table
Reference in a new issue