Clean up fill-repodb

This commit is contained in:
Alexander Lakhin 2014-02-07 15:02:11 +04:00
parent 3712dda39c
commit 3d3f873e57

View file

@ -6,10 +6,8 @@ import sys
import gettext import gettext
import argparse import argparse
import sqlite3 import sqlite3
import string
import rpm import rpm
import re import re
import tempfile
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import subprocess import subprocess
import shutil import shutil
@ -34,12 +32,12 @@ def getFileList(path, ext, filelist):
if os.path.isdir(path + '/' + d): if os.path.isdir(path + '/' + d):
filelist = getFileList(path + '/' + d, ext, filelist) filelist = getFileList(path + '/' + d, ext, filelist)
else: else:
if string.lower(d[-extlen:]) == '%s' % (ext): if d[-extlen:].lower() == ext:
newpath = os.path.normpath(path + '/' + d) newpath = os.path.normpath(path + '/' + d)
filelist.append(newpath) filelist.append(newpath)
return filelist return filelist
def parseargs(args): def parseargs():
parser = argparse.ArgumentParser(description=_('extract packages metadata' parser = argparse.ArgumentParser(description=_('extract packages metadata'
' from RPM repositories')) ' from RPM repositories'))
parser.add_argument('config', metavar='config', parser.add_argument('config', metavar='config',
@ -113,12 +111,12 @@ def index_database(conn):
conn.commit() conn.commit()
def add_repodir(xrepodir, conn): def add_repodir(xrepodir, conn):
c = conn.cursor() dbc = conn.cursor()
c.execute("""INSERT INTO repodirs (name, path, sources) VALUES (?, ?, ?)""", dbc.execute("""INSERT INTO repodirs (name, path, sources) VALUES (?, ?, ?)""",
[xrepodir.get('name'), xrepodir.get('path'), xrepodir.get('sources')]) [xrepodir.get('name'), xrepodir.get('path'), xrepodir.get('sources')])
repodir_id = c.lastrowid repodir_id = dbc.lastrowid
for depend in xrepodir.findall('dependency'): for depend in xrepodir.findall('dependency'):
c.execute("""INSERT INTO repodir_depends(repodir_id, depend_repodir_name) dbc.execute("""INSERT INTO repodir_depends(repodir_id, depend_repodir_name)
VALUES (?, ?)""", VALUES (?, ?)""",
[repodir_id, depend.text.strip()]) [repodir_id, depend.text.strip()])
conn.commit() conn.commit()
@ -151,12 +149,12 @@ def get_rpm_header(rpm_ts, pkg):
hdr = None hdr = None
try: try:
fdno = os.open(pkg, os.O_RDONLY) fdno = os.open(pkg, os.O_RDONLY)
except OSError, e: except OSError as exc:
raise Exception('Unable to open file %s.' % pkg) raise Exception('Unable to open file %s.\n%s' % (pkg, exc))
try: try:
hdr = rpm_ts.hdrFromFdno(fdno) hdr = rpm_ts.hdrFromFdno(fdno)
except rpm.error, e: except rpm.error as exc:
raise Exception('Unable to read RPM header for %s.' % pkg) raise Exception('Unable to read RPM header for %s\n%s.' % (pkg, exc))
finally: finally:
os.close(fdno) os.close(fdno)
return hdr return hdr
@ -174,12 +172,11 @@ FILE_REC_PATH_IDX = 3
FILE_REC_LINK_IDX = 6 FILE_REC_LINK_IDX = 6
FILE_REC_MARK_IDX = 7 FILE_REC_MARK_IDX = 7
def register_object(data, pkg_id, pkg, object_file_record, temp_dir, no_so_symbols): def register_object(data, object_file_record, temp_dir, no_so_symbols):
so_needed = data['so_needed'] so_needed = data['so_needed']
obj_symbols = data['obj_symbols'] obj_symbols = data['obj_symbols']
obj_id = object_file_record[0] obj_id = object_file_record[0]
obj_file_path = object_file_record[3] obj_file_path = object_file_record[3]
obj_file_basename = os.path.basename(obj_file_path)
temp_obj_file = os.path.join(temp_dir, obj_file_path.lstrip('/')) temp_obj_file = os.path.join(temp_dir, obj_file_path.lstrip('/'))
target_file = None target_file = None
@ -222,7 +219,6 @@ def register_object(data, pkg_id, pkg, object_file_record, temp_dir, no_so_symbo
object_file_record[FILE_REC_LINK_IDX] = target_file object_file_record[FILE_REC_LINK_IDX] = target_file
object_file_record[FILE_REC_MARK_IDX] = file_mark object_file_record[FILE_REC_MARK_IDX] = file_mark
needed_list = []
dynsection = False dynsection = False
for odline in od_out.split('\n'): for odline in od_out.split('\n'):
odls = odline.strip() odls = odline.strip()
@ -235,9 +231,8 @@ def register_object(data, pkg_id, pkg, object_file_record, temp_dir, no_so_symbo
if needrem: if needrem:
so_needed.append([obj_id, needrem.group(1)]) so_needed.append([obj_id, needrem.group(1)])
symbols_list = []
for symline in nmundef_out.split('\n'): for symline in nmundef_out.split('\n'):
smre = re.match('^.([\S]*)\s+(\w)\s(.*)$', symline) smre = re.match(r'^.([\S]*)\s+(\w)\s(.*)$', symline)
if smre: if smre:
if smre.group(2) in ['v', 'w']: if smre.group(2) in ['v', 'w']:
continue continue
@ -245,7 +240,7 @@ def register_object(data, pkg_id, pkg, object_file_record, temp_dir, no_so_symbo
obj_symbols.append([obj_id, symname, 0]) obj_symbols.append([obj_id, symname, 0])
for symline in nmdef_out.split('\n'): for symline in nmdef_out.split('\n'):
smre = re.match('^.([\S]*)\s+(\w)\s(.*)$', symline) smre = re.match(r'^.([\S]*)\s+(\w)\s(.*)$', symline)
if smre: if smre:
symname = smre.group(3) symname = smre.group(3)
obj_symbols.append([obj_id, symname, 1]) obj_symbols.append([obj_id, symname, 1])
@ -260,7 +255,7 @@ def extract_files(pkg, files_list, obj_so_files_idx, temp_dir):
for i in obj_so_files_idx: for i in obj_so_files_idx:
f.write('.' + files_list[i][FILE_REC_PATH_IDX] + '\n') f.write('.' + files_list[i][FILE_REC_PATH_IDX] + '\n')
rpm_cpio_cmd = 'rpm2cpio ' + pkg + ' | cpio -ivdu -E ' + filelist rpm_cpio_cmd = 'rpm2cpio ' + local_pkg + ' | cpio -ivdu -E ' + filelist
p = subprocess.Popen(rpm_cpio_cmd, p = subprocess.Popen(rpm_cpio_cmd,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
@ -379,8 +374,8 @@ def process_package_worker(num, queue_in, generator, gen_lock, db_struct,
os.makedirs(pkg_temp_dir) os.makedirs(pkg_temp_dir)
if extract_files(pkg, files_list, obj_so_files_idx, pkg_temp_dir): if extract_files(pkg, files_list, obj_so_files_idx, pkg_temp_dir):
for i in obj_so_files_idx: for i in obj_so_files_idx:
register_object(data, pkg_id, pkg, files_list[i], register_object(data, files_list[i], pkg_temp_dir,
pkg_temp_dir, no_so_symbols) no_so_symbols)
shutil.rmtree(pkg_temp_dir, True) shutil.rmtree(pkg_temp_dir, True)
@ -434,10 +429,10 @@ def process_repodir(repodir_path, repodir_id, build_archs, conn, db_struct,
'rpm_filepath': 'TEXT', 'rpm_filepath': 'TEXT',
'sourcerpm_package': 'TEXT'} 'sourcerpm_package': 'TEXT'}
file_tags_re = '^RPMTAG_(BASENAMES|FILE[\w\d]+)' file_tags_re = r'^RPMTAG_(BASENAMES|FILE[\w\d]+)'
dir_tags_re = '^RPMTAG_DIR(INDEXES|NAMES)' dir_tags_re = r'^RPMTAG_DIR(INDEXES|NAMES)'
changelog_tags_re = '^RPMTAG_CHANGELOG\w+' changelog_tags_re = r'^RPMTAG_CHANGELOG\w+'
trigger_tags_re = '^RPMTAG_TRIGGER\w+' trigger_tags_re = r'^RPMTAG_TRIGGER\w+'
datetime_tags = ['RPMTAG_PACKAGETIME', 'RPMTAG_RPMLIBTIMESTAMP', ] datetime_tags = ['RPMTAG_PACKAGETIME', 'RPMTAG_RPMLIBTIMESTAMP', ]
db_struct['blob_tags'] = ['RPMTAG_DSAHEADER', 'RPMTAG_HEADERIMMUTABLE', db_struct['blob_tags'] = ['RPMTAG_DSAHEADER', 'RPMTAG_HEADERIMMUTABLE',
@ -451,7 +446,7 @@ def process_repodir(repodir_path, repodir_id, build_archs, conn, db_struct,
types = {"<type 'str'>" : "TEXT", "<type 'int'>": "INTEGER", types = {"<type 'str'>" : "TEXT", "<type 'int'>": "INTEGER",
"<type 'NoneType'>": "TEXT", "<type 'list'>": "TEXT"} "<type 'NoneType'>": "TEXT", "<type 'list'>": "TEXT"}
dep_tags_re = '^RPMTAG_(CONFLICT|OBSOLETE|PROVIDE|REQUIRE)\w+' dep_tags_re = r'^RPMTAG_(CONFLICT|OBSOLETE|PROVIDE|REQUIRE)\w+'
db_struct['dep_tables'] = ['requires', 'provides', db_struct['dep_tables'] = ['requires', 'provides',
'conflicts', 'obsoletes'] 'conflicts', 'obsoletes']
@ -472,7 +467,7 @@ def process_repodir(repodir_path, repodir_id, build_archs, conn, db_struct,
"BLOB" if tag in db_struct['blob_tags'] else \ "BLOB" if tag in db_struct['blob_tags'] else \
types[str(type(hdr[tag]))] types[str(type(hdr[tag]))]
fieldname = tag.replace('RPMTAG_', '').lower() fieldname = tag.replace('RPMTAG_', '').lower()
if fieldname in ['id', 'group']: if fieldname in reserved_field_names:
fieldname = 'rpm_' + fieldname fieldname = 'rpm_' + fieldname
packages_tags.append(tag) packages_tags.append(tag)
packages_field_names += fieldname + ', ' packages_field_names += fieldname + ', '
@ -482,7 +477,8 @@ def process_repodir(repodir_path, repodir_id, build_archs, conn, db_struct,
for extra_field in sorted(packages_extra_fields.keys()): for extra_field in sorted(packages_extra_fields.keys()):
packages_field_names += (', ' if nef > 0 else '') + extra_field packages_field_names += (', ' if nef > 0 else '') + extra_field
packages_values_template += (', ' if nef > 0 else '') + '?' packages_values_template += (', ' if nef > 0 else '') + '?'
packages_fields += (', ' if nef > 0 else '') + extra_field + ' ' + packages_extra_fields[extra_field] packages_fields += (', ' if nef > 0 else '') + extra_field + ' ' + \
packages_extra_fields[extra_field]
nef += 1 nef += 1
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS packages(id INTEGER PRIMARY KEY NOT NULL, %s) CREATE TABLE IF NOT EXISTS packages(id INTEGER PRIMARY KEY NOT NULL, %s)
@ -516,7 +512,8 @@ CREATE TABLE IF NOT EXISTS %s (id INTEGER PRIMARY KEY NOT NULL,
workers = [] workers = []
for i in xrange(NUM_PROCESSES): for i in xrange(NUM_PROCESSES):
worker = mp.Process(target = process_package_worker, worker = mp.Process(target = process_package_worker,
args = (i, queue_in, id_generator, generator_lock, db_struct, args = (i, queue_in, id_generator,
generator_lock, db_struct,
repodir_id, build_archs, tempdir, repodir_id, build_archs, tempdir,
no_shared_objects, no_so_symbols)) no_shared_objects, no_so_symbols))
workers.append(worker) workers.append(worker)
@ -533,16 +530,15 @@ def main(args):
if hasattr(os, "sysconf"): if hasattr(os, "sysconf"):
if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"): if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
n = os.sysconf("SC_NPROCESSORS_ONLN") nproc = os.sysconf("SC_NPROCESSORS_ONLN")
if isinstance(n, int) and n > 0: if isinstance(nproc, int) and nproc > 0:
NUM_PROCESSES = n NUM_PROCESSES = nproc
conn = sqlite3.connect(DB) conn = sqlite3.connect(DB)
init_database(conn) init_database(conn)
conn.commit() conn.commit()
c = conn.cursor()
options = parseargs(args) options = parseargs()
parser = ET.XMLParser() parser = ET.XMLParser()
tree = ET.parse(options.config, parser=parser) tree = ET.parse(options.config, parser=parser)
config_root = tree.getroot() config_root = tree.getroot()