repo-analyzer/prepare-repodb.py
2014-02-20 09:32:26 +04:00

1409 lines
58 KiB
Python
Executable file

#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import sys
import gettext
import argparse
import sqlite3
import rpm
import re
import xml.etree.ElementTree as ET
import subprocess
import shutil
import time
import multiprocessing as mp
import gc
import urllib
from urlparse import urlparse, urlunparse
gettext.install('urpm-tools')
DB = 'repo.db'
NUM_PROCESSES = 1 # number of CPU's (evaluated automatically)
RPMSENSE_LESS = 0x02
RPMSENSE_GREATER = 0x04
RPMSENSE_EQUAL = 0x08
RPMSENSE_SENSEMASK = 0x0f
RPMSENSE_SCRIPT_POST = 0x400
RPMSENSE_SCRIPT_PREUN = 0x800
RPMSENSE_SCRIPT_POSTUN = 0x1000
RPMSENSE_FIND_PROVIDES = 0x8000
RPMSENSE_MISSINGOK = 0x80000
RPMFILEMODE_DIRECTORY = 0x4000
RPMFILEMODE_EXECUTE = 0111
def parse_args():
parser = argparse.ArgumentParser(description=_('extract packages metadata'
' from RPM repositories'))
parser.add_argument('config', metavar='config',
help=_('path to repo-analyze-config.xml'))
parser.add_argument('-c', '--cache-dir',
help=_('path to cache directory'))
parser.add_argument('-O', '--no-shared-objects', action='store_true',
help=_('don\'t process shared objects'))
parser.add_argument('-S', '--no-so-symbols', action='store_true',
help=_('don\'t process shared object symbols'))
opts = parser.parse_args()
return opts
#################################################
# Fill database with the repositories data
################################################
def init_database(conn):
conn.executescript("""
CREATE TABLE repodirs(id INTEGER PRIMARY KEY NOT NULL,
name TEXT UNIQUE, url TEXT, arch TEXT, sources TEXT);
CREATE TABLE repodir_depends(id INTEGER PRIMARY KEY NOT NULL,
repodir_id INTEGER, depend_repodir_name TEXT);
CREATE TABLE IF NOT EXISTS package_files(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER NOT NULL, basename TEXT, path TEXT,
size INTEGER, mode INTEGER,
link_to_file_id INTEGER, link_to_path TEXT, mark TEXT);
CREATE TABLE package_requires_res(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER, requires_id INTEGER,
provides_id INTEGER, dep_package_id INTEGER);
CREATE TABLE package_conflicts_res(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER, conflicts_id INTEGER,
provides_id INTEGER, dep_package_id INTEGER);
CREATE TABLE package_obsoletes_res(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER, obsoletes_id INTEGER,
provides_id INTEGER, dep_package_id INTEGER);
CREATE TABLE so_needed(id INTEGER PRIMARY KEY NOT NULL,
obj_file_id INTEGER, name TEXT);
CREATE TABLE so_needed_res(id INTEGER PRIMARY KEY NOT NULL,
so_needed_id INTEGER, dep_obj_file_id INTEGER, res_type INTEGER);
CREATE TABLE obj_symbols(id INTEGER PRIMARY KEY NOT NULL,
obj_file_id INTEGER, name TEXT, sym_type INTEGER);
CREATE TABLE obj_symbols_res(id INTEGER PRIMARY KEY NOT NULL,
obj_sym_id INTEGER, dep_obj_sym_id INTEGER, res_type INTEGER);
PRAGMA synchronous = OFF;
PRAGMA journal_mode = OFF;
PRAGMA cache_size = -1048576;
""")
conn.commit()
def index_database(conn):
print 'Indexing the database...'
conn.executescript("""
CREATE INDEX rd_name ON repodirs(name);
CREATE INDEX pkg_name ON packages(name);
CREATE INDEX pkg_nvra ON packages(nvra);
CREATE INDEX pkg_arch ON packages(arch);
CREATE INDEX pkg_group ON packages(rpm_group);
CREATE INDEX pkg_repodir ON packages(repodir_id);
CREATE INDEX pkg_rq_pkg_req ON package_requires_res(package_id, requires_id);
CREATE INDEX pkg_rq_pkg_prov ON package_requires_res(dep_package_id, provides_id);
CREATE INDEX pkg_cf_pkg_conf ON package_conflicts_res(package_id, conflicts_id);
CREATE INDEX pkg_cf_pkg_prov ON package_conflicts_res(dep_package_id, provides_id);
CREATE INDEX pkg_ob_pkg_obs ON package_obsoletes_res(package_id, obsoletes_id);
CREATE INDEX pkg_ob_pkg_prov ON package_obsoletes_res(dep_package_id, provides_id);
CREATE INDEX pkg_file_pkg_id ON package_files(package_id);
CREATE INDEX pkg_file_name ON package_files(basename);
CREATE INDEX pkg_file_path ON package_files(path);
CREATE INDEX pkg_file_mark ON package_files(mark);
CREATE INDEX so_needed_obj_id ON so_needed(obj_file_id);
CREATE INDEX so_needed_res_sn ON so_needed_res(so_needed_id);
CREATE INDEX symbols_obj_name_type ON obj_symbols(obj_file_id, name, sym_type);
CREATE INDEX symbols_name_type ON obj_symbols(name, sym_type);
CREATE INDEX symbols_res_sym ON obj_symbols_res(obj_sym_id);
""")
dep_tables = ['rpm_requires', 'rpm_provides',
'rpm_conflicts', 'rpm_obsoletes']
for table in dep_tables:
conn.execute('CREATE INDEX %(tbl)s_pkg ON %(tbl)s(package_id)' %
{'tbl': table})
conn.execute('CREATE INDEX %(tbl)s_name ON %(tbl)s(name)' %
{'tbl': table})
conn.commit()
def get_rpm_header(rpm_ts, pkg):
hdr = None
try:
fdno = os.open(pkg, os.O_RDONLY)
except OSError as exc:
raise Exception('Unable to open file %s.\n%s' % (pkg, exc))
try:
hdr = rpm_ts.hdrFromFdno(fdno)
except rpm.error as exc:
raise Exception('Unable to read RPM header for %s\n%s.' % (pkg, exc))
finally:
os.close(fdno)
return hdr
def generate_new_id(generator, gen_lock):
gen_lock.acquire()
last_id = generator.value
last_id += 1
generator.value = last_id
gen_lock.release()
return last_id
FILE_REC_ID_IDX = 0
FILE_REC_PATH_IDX = 3
FILE_REC_LINK_IDX = 6
FILE_REC_MARK_IDX = 7
def register_object(data, object_file_record, temp_dir, no_so_symbols):
so_needed = data['so_needed']
obj_symbols = data['obj_symbols']
obj_id = object_file_record[0]
obj_file_path = object_file_record[3]
temp_obj_file = os.path.join(temp_dir, obj_file_path.lstrip('/'))
target_file = None
file_mark = None
od_out = ''
nmundef_out = ''
nmdef_out = ''
if os.path.islink(temp_obj_file):
target_file = os.path.join(os.path.dirname(obj_file_path),
os.readlink(temp_obj_file))
file_mark = 'link'
elif not os.path.exists(temp_obj_file):
file_mark = 'not-found'
else:
p = subprocess.Popen(['objdump', '-p', temp_obj_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
od_out = p.communicate()[0]
if p.returncode != 0:
file_mark = 'invalid-format'
elif not(no_so_symbols):
p = subprocess.Popen(['nm', '-p', '-D', '--undefined-only',
temp_obj_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
nmundef_out = p.communicate()[0]
if p.returncode != 0:
file_mark = 'no-symbols'
else:
p = subprocess.Popen(['nm', '-p', '-D', '--defined-only',
temp_obj_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
nmdef_out = p.communicate()[0]
if p.returncode != 0:
file_mark = 'no-symbols'
else:
file_mark = 'so'
object_file_record[FILE_REC_LINK_IDX] = target_file
object_file_record[FILE_REC_MARK_IDX] = file_mark
dynsection = False
for odline in od_out.split('\n'):
odls = odline.strip()
if odls == '':
dynsection = False
elif odls == 'Динамический раздел:' or odls == 'Dynamic section:':
dynsection = True
elif dynsection:
needrem = re.match(r'\s+NEEDED\s+(.*)', odline)
if needrem:
so_needed.append([obj_id, needrem.group(1)])
for symline in nmundef_out.split('\n'):
smre = re.match(r'^.([\S]*)\s+(\w)\s(.*)$', symline)
if smre:
if smre.group(2) in ['v', 'w']:
continue
symname = smre.group(3)
obj_symbols.append([obj_id, symname, 0])
for symline in nmdef_out.split('\n'):
smre = re.match(r'^.([\S]*)\s+(\w)\s(.*)$', symline)
if smre:
symname = smre.group(3)
obj_symbols.append([obj_id, symname, 1])
return obj_id
def extract_files(local_pkg, files_list, obj_so_files_idx, temp_dir):
filelist = os.path.join(temp_dir, 'files.lst')
with open(filelist, 'w') as f:
for i in obj_so_files_idx:
f.write('.' + files_list[i][FILE_REC_PATH_IDX] + '\n')
rpm_cpio_cmd = 'rpm2cpio ' + local_pkg + ' | cpio -ivdu -E ' + filelist
p = subprocess.Popen(rpm_cpio_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=temp_dir,
shell=True)
output = p.communicate()[0]
if p.returncode != 0:
print >> sys.stderr, 'Couldn\'t extract files from package %s.' \
'\n\t%s' % (local_pkg, output.decode('utf-8'))
return False
return True
def process_package_worker(num, queue_in, generator, gen_lock, db_struct,
repodir_id, build_archs, temp_dir,
no_shared_objects, no_so_symbols):
def to_string(pkg, tag, val):
if type(val) == type([]):
if not(val):
return None
try:
return str(val).decode('utf-8')
except:
print >> sys.stderr, ('Invalid UTF-8 string (%s: %s):\n' %
(pkg, tag))
print >> sys.stderr, val
return str(val).decode('utf-8', 'replace')
rpm_ts = rpm.TransactionSet()
rpm_ts.setVSFlags(~(rpm.RPMVSF_NEEDPAYLOAD))
data = {}
data['packages'] = []
for table in db_struct['dep_tables']:
data[table] = []
data['package_files'] = []
data['so_needed'] = []
data['obj_symbols'] = []
while True:
job = queue_in.get()
if job is None:
break
(pkg, ) = job
pkg_id = generate_new_id(generator, gen_lock)
local_pkg = get_local_file(pkg, temp_dir)
hdr = get_rpm_header(rpm_ts, local_pkg)
package_values = []
package_values.append(pkg_id)
for tag in db_struct['packages_tags']:
hval = hdr[tag]
package_values.append(
(sqlite3.Binary(hval) if len(hval)>0 else None)
if tag in db_struct['blob_tags'] else \
to_string(pkg, tag, hval) if \
type(hval) in [type([]), type('')] else hval
)
package_values.append(repodir_id)
package_values.append(pkg)
package_values.append(None)
data['packages'].append(package_values)
for table in db_struct['dep_tables']:
table_data = data[table]
rpref = 'RPMTAG_' + table[4 : -1].upper() # rpm_requires
(dep_name, dep_flags, dep_version) = \
(hdr[rpref + 'NAME'], hdr[rpref + 'FLAGS'], hdr[rpref + 'VERSION'])
for i in xrange(0, len(hdr[rpref + 'NAME'])):
for build_arch in build_archs:
table_data.append([dep_name[i].decode('utf-8'),
dep_flags[i],
dep_version[i],
pkg_id, build_arch])
(pkg_file_paths, pkg_file_names, pkg_file_sizes, pkg_file_modes) = \
(hdr['RPMTAG_FILEPATHS'], hdr['RPMTAG_BASENAMES'],
hdr['RPMTAG_FILESIZES'], hdr['RPMTAG_FILEMODES'])
files_list = data['package_files']
files_dirs = {}
obj_so_files_idx = []
for i in xrange(0, len(pkg_file_paths)):
file_name = pkg_file_names[i]
file_path = pkg_file_paths[i]
pkg_file_id = generate_new_id(generator, gen_lock)
files_list.append([pkg_file_id, #FILE_REC_ID_IDX = 0
pkg_id,
file_name.decode('utf-8'),
file_path.decode('utf-8'), #FILE_REC_PATH_IDX = 3
pkg_file_sizes[i],
pkg_file_modes[i],
None, #link_to_path FILE_REC_LINK_IDX = 6
None #mark FILE_REC_LINK_IDX = 7
])
if pkg_file_modes[i] & RPMFILEMODE_DIRECTORY != 0:
files_dirs[file_path] = False
continue
dir_name = os.path.dirname(file_path)
if dir_name != '' and dir_name not in files_dirs:
files_dirs[dir_name] = True
if no_shared_objects:
continue
if os.path.splitext(file_name)[1] in \
['.debug', '.xz', '.conf', '.py', '.c', '.h', '.hpp', '.png',
'.cc', '.cpp', '.sh', '.java', '.pl', '.patch', '.desktop']:
continue
if file_path.startswith('/usr/lib/debug/.build-id') or \
file_path.endswith('/ld.so.cache'):
continue
if re.search(r'\.so($|\.)', file_name) or \
(pkg_file_modes[i] & RPMFILEMODE_EXECUTE) != 0:
obj_so_files_idx.append(len(files_list) - 1)
for fdir in sorted(files_dirs.keys()):
if files_dirs[fdir]:
# Add parent directories as implicit files
# TODO: recursive processing?
pkg_file_id = generate_new_id(generator, gen_lock)
files_list.append([pkg_file_id, #FILE_REC_ID_IDX = 0
pkg_id,
os.path.basename(fdir),
fdir, #FILE_REC_PATH_IDX = 3
0,
-1, # special mode
None, #link_to_path FILE_REC_LINK_IDX = 6
None #mark FILE_REC_LINK_IDX = 7
])
if obj_so_files_idx:
pkg_temp_dir = os.path.join(temp_dir, os.path.basename(local_pkg))
os.makedirs(pkg_temp_dir)
if extract_files(local_pkg, files_list,
obj_so_files_idx, pkg_temp_dir):
for i in obj_so_files_idx:
register_object(data, files_list[i], pkg_temp_dir,
no_so_symbols)
shutil.rmtree(pkg_temp_dir, True)
remove_cached_file(pkg)
queue_in.task_done()
conn = sqlite3.connect(DB, timeout=30)
conn.executemany("""
INSERT INTO packages (%s) VALUES (%s)""" %
(db_struct['packages_field_names'],
db_struct['packages_values_template']),
data['packages'])
for table in db_struct['dep_tables']:
conn.executemany("""
INSERT INTO %s (name, flags, version, package_id, build_arch)
VALUES (?, ?, ?, ?, ?)""" % table, data[table])
conn.executemany("""
INSERT INTO package_files (id, package_id, basename, path,
size, mode, link_to_path, mark)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)""", data['package_files'])
conn.executemany("""
INSERT INTO so_needed(obj_file_id, name) VALUES(?, ?)
""", data['so_needed'])
conn.executemany("""
INSERT INTO obj_symbols(obj_file_id, name, sym_type) VALUES(?, ?, ?)
""", data['obj_symbols'])
conn.commit()
queue_in.task_done()
def get_files(url, ext):
filelist = []
urlp = urlparse(url)
if urlp.scheme in ['ftp', 'http', 'https']:
return parse_index_html(wget_url(url, None), url, '.rpm')
dir_list = os.listdir(url)
for d in dir_list:
if d.endswith(ext):
filepath = os.path.normpath(os.path.join(url, d))
filelist.append(filepath)
return filelist
local_cache = {}
def get_local_file(url, temp_dir):
urlp = urlparse(url)
if urlp.scheme in ['ftp', 'http', 'https']:
cached_file_name = local_cache.get(url)
if cached_file_name and os.path.isfile(cached_file_name):
return cached_file_name
cache_dir = os.path.join(temp_dir, 'cache')
if not os.path.isdir(cache_dir):
os.makedirs(cache_dir)
temp_file = os.path.join(cache_dir, os.path.basename(url))
wget_url(url, temp_file)
local_cache[url] = temp_file
return temp_file
return url
def remove_cached_file(url):
cached_file_name = local_cache.get(url)
if cached_file_name:
os.unlink(cached_file_name)
del local_cache[url]
def wget_url(url, target_file):
urlp = urlparse(url)
wget_params = []
site = urlp.netloc
if urlp.username:
wget_params = wget_params + ['--auth-no-challenge',
'--http-user=%s' % urlp.username,
'--http-password=%s' %
('""' if not urlp.password else urlp.password)]
site = site[site.find('@') + 1:]
url = urlunparse((urlp.scheme, site, urlp.path, urlp.params,
urlp.query, urlp.fragment))
print 'Downloading %s...' % url
if target_file is None:
wget_params += ['-nv', '-O-', url]
else:
wget_params += ['-nv', '-O', target_file, url]
p = subprocess.Popen(['wget'] + wget_params,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
wget_out, wget_err = p.communicate()
if p.returncode != 0:
print >> sys.stderr, ('Unable to get data from the url: %s '
'(error: %d).\n%s\n%s') % \
(url, p.returncode, wget_out, wget_err)
raise Exception('Unable to download data (%d).' % p.returncode)
if target_file is None:
return wget_out
def parse_index_html(index_html, base_url, filter_ext):
file_list = []
for match in re.finditer(r'href="([^"]+)"', index_html, re.M):
filename = match.group(1)
if filename.endswith(filter_ext):
if '://' in filename[:8]:
file_list.append(filename)
continue
filepath = os.path.join(base_url, filename)
if os.path.dirname(filepath) == base_url.rstrip('/') and \
os.path.basename(filepath) == filename:
file_list.append(filepath)
return file_list
def download_repodir(source_urlp, cache_dir):
site = source_urlp.netloc
site = site[site.find('@') + 1:]
target_dir = os.path.join(cache_dir,
site,
source_urlp.path.lstrip('/'))
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
remote_files = {}
if source_urlp.scheme in ['ftp', 'http', 'https']:
source_url = source_urlp.geturl()
remote_dir_contents = parse_index_html(wget_url(source_url, None),
source_url, '.rpm')
for remote_file in remote_dir_contents:
remote_filename = urllib.unquote(os.path.basename(remote_file))
remote_files[remote_filename] = True
target_file = os.path.join(target_dir, remote_filename)
if os.path.isfile(target_file):
continue
wget_url(remote_file, target_file)
for local_filename in os.listdir(target_dir):
if local_filename not in remote_files and \
local_filename.endswith('.rpm'):
print 'Removing local file: %s.' % local_filename
os.unlink(os.path.join(target_dir, local_filename))
return target_dir
def urpm_get_packages(media):
extra_params = []
if not media.endswith(' update'):
extra_params = ['--exclude-media', media + ' update']
p = subprocess.Popen(['urpmq', '-r', '--ignorearch',
'--list', '--media', media] +
extra_params,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
urpmqr_out, urpmqr_err = p.communicate()
if p.returncode != 0 or len(urpmqr_err) > 0:
print >> sys.stderr, ('Unable to get a list of packages '
'from the media: %s.\n'
'%s\n%s') % (media, urpmqr_out, urpmq_err)
raise Exception('Unable to get a list of packages (%d).' % p.returncode)
# urpmi --no-install --allow-nodeps --force
# --download-all=/tmp/ xine-wavpack-1.2.4-1plf --media Desktop2012.1-8
p = subprocess.Popen(['urpmq', '-f', '--ignorearch',
'--list', '--media', media] +
extra_params,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
urpmqf_out, urpmqf_err = p.communicate()
if p.returncode != 0 or len(urpmqf_err) > 0:
print >> sys.stderr, ('Unable to get a list of packages '
'from the media: %s.\n'
'%s\n%s') % (media, urpmqf_out, urpmqf_err)
raise Exception('Unable to get a list of packages (%d).' % p.returncode)
rpm_list = []
qr_lines = urpmqr_out.split('\n')
qf_lines = urpmqf_out.split('\n')
if len(qr_lines) != len(qf_lines):
raise Exception('Not consistent urpmq -r and urpmq -f outputs '
'(%d and %d lines).' % (len(qr_lines), len(qf_lines)))
for i in xrange(0, len(qf_lines)):
qf_line = qf_lines[i]
if qf_line.strip() == '':
continue
if not qf_line.startswith(qr_lines[i]):
raise Exception('Not consistent urpmq -r and urpmq -f outputs: '
'%s and %s' % (qr_lines[i], qf_line))
rpm_list.append('urpm://%s/%s.rpm#%s' % (urllib.quote(media),
urllib.quote(qf_line),
urllib.quote(qr_lines[i])))
return rpm_list
def get_urpmi(urpm_package, target_dir):
urlp = urlparse(urpm_package)
package_name = urllib.unquote(urlp.fragment)
print package_name
p = subprocess.Popen(['urpmi', '--no-install',
'--force', '--no-suggests',
'--allow-nodeps',
'--no-download-all',
'--media', urlp.netloc,
package_name])
#stdout=subprocess.PIPE,
#stderr=subprocess.PIPE)
urpmi_out, urpmi_err = p.communicate()
if p.returncode != 0:
print >> sys.stderr, ('Unable to get the package %s '
'from the media %s.\n'
'%s\n%s') % (
package_name, urlp.netloc,
urpmi_out, urpmi_err)
raise Exception('Unable to get the package %s (%d).' %
(package_name, p.returncode))
def urpm_get_repodir(repodir_name, cache_dir):
target_dir = os.path.join(cache_dir,
repodir_name,
'rpms')
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
urpm_files = {}
urpm_media_contents = urpm_get_packages(repodir_name)
for urpm_package in urpm_media_contents:
remote_filename = urllib.unquote(os.path.basename(urpm_package))
target_file = os.path.join(target_dir, remote_filename)
get_urpmi(urpm_package, os.path.join(cache_dir,
repodir_name))
print target_file
raise Exception('Not implemented.')
generator_value = 0
def import_repodir(repodir_id, cache_dir, build_archs, conn,
db_struct, temp_dir, no_shared_objects, no_so_symbols):
rd_rec = conn.execute("""SELECT name, url FROM repodirs WHERE id = ?""",
[repodir_id]).fetchone()
(repodir_name, repodir_url) = (rd_rec[0], rd_rec[1])
urlp = urlparse(repodir_url)
working_url = repodir_url
if cache_dir is not None:
if urlp.scheme in ['ftp', 'http', 'https']:
working_url = download_repodir(urlp, cache_dir)
elif urlp.scheme == 'urpm':
working_url = urpm_get_repodir(repodir_name, cache_dir)
elif urlp.scheme not in ['', 'file']:
raise Exception('Invalid scheme in the repository url: %s' %
repodir_url)
rpm_list = []
rpm_list = get_files(working_url, '.rpm')
if not rpm_list:
return
print urlp.netloc[urlp.netloc.find('@') + 1:] + urlp.path, ': ', \
len(rpm_list)
if not db_struct.get('defined'):
rpm_ts = rpm.TransactionSet()
rpm_ts.setVSFlags(~(rpm.RPMVSF_NEEDPAYLOAD))
# ts.setVSFlags(~(rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD))
hdr = get_rpm_header(rpm_ts, get_local_file(rpm_list[0], temp_dir))
# Retain sort order!
packages_extra_fields = {'repodir_id': 'INTEGER',
'rpm_url': 'TEXT',
'sourcerpm_package': 'TEXT'}
file_tags_re = r'^RPMTAG_(BASENAMES|FILE[\w\d]+)'
dir_tags_re = r'^RPMTAG_DIR(INDEXES|NAMES)'
changelog_tags_re = r'^RPMTAG_CHANGELOG\w+'
trigger_tags_re = r'^RPMTAG_TRIGGER\w+'
datetime_tags = ['RPMTAG_PACKAGETIME', 'RPMTAG_RPMLIBTIMESTAMP', ]
db_struct['blob_tags'] = ['RPMTAG_RSAHEADER', 'RPMTAG_DSAHEADER',
'RPMTAG_HEADERIMMUTABLE', 'RPMTAG_SIGMD5',
'RPMTAG_PKGID', 'RPMTAG_SOURCEPKGID']
reserved_field_names = ['id', 'group']
skip_tags_re = '^RPMTAG_(C|D|E|N|P|R|V|HEADERIMMUTABLE)$'
#C - CONFLICTNAME, D - DISTEPOCH, E - EPOCH, N - NAME, O - OBSOLETENAME
#P - PROVIDENAME, R - RELEASE, V - VERSION
types = {"<type 'str'>" : "TEXT", "<type 'int'>": "INTEGER",
"<type 'NoneType'>": "TEXT", "<type 'list'>": "TEXT"}
dep_tags_re = r'^RPMTAG_(CONFLICT|OBSOLETE|PROVIDE|REQUIRE)\w+'
db_struct['dep_tables'] = ['rpm_requires', 'rpm_provides',
'rpm_conflicts', 'rpm_obsoletes']
packages_field_names = 'id, '
packages_values_template = '?,'
packages_tags = []
packages_fields = ''
rpmtags = [str(t) for t in dir(rpm) if t.startswith('RPMTAG_') ]
for tag in rpmtags:
if (re.match(file_tags_re, tag) or re.match(dir_tags_re, tag) or
re.match(changelog_tags_re, tag) or
re.match(skip_tags_re, tag) or
re.match(trigger_tags_re, tag) or
re.match(dep_tags_re, tag)):
continue
sqltype = "TIMESTAMP" if tag in datetime_tags else \
"BLOB" if tag in db_struct['blob_tags'] else \
types[str(type(hdr[tag]))]
fieldname = tag.replace('RPMTAG_', '').lower()
if fieldname in reserved_field_names:
fieldname = 'rpm_' + fieldname
packages_tags.append(tag)
packages_field_names += fieldname + ', '
packages_values_template += '?, '
packages_fields += fieldname + ' ' + sqltype + ', '
nef = 0
for extra_field in sorted(packages_extra_fields.keys()):
packages_field_names += (', ' if nef > 0 else '') + extra_field
packages_values_template += (', ' if nef > 0 else '') + '?'
packages_fields += (', ' if nef > 0 else '') + extra_field + ' ' + \
packages_extra_fields[extra_field]
nef += 1
conn.execute("""
CREATE TABLE IF NOT EXISTS packages(id INTEGER PRIMARY KEY NOT NULL, %s)
""" % (packages_fields))
for table in db_struct['dep_tables']:
conn.execute("""
CREATE TABLE IF NOT EXISTS %s (id INTEGER PRIMARY KEY NOT NULL,
name TEXT, flags INTEGER, version TEXT, build_arch TEXT,
package_id INTEGER NOT NULL)""" % (table))
conn.commit()
db_struct['packages_tags'] = packages_tags
db_struct['packages_field_names'] = packages_field_names
db_struct['packages_values_template'] = packages_values_template
db_struct['defined'] = True
queue_in = mp.JoinableQueue()
for pkg in rpm_list:
queue_in.put((pkg, ))
for i in xrange(NUM_PROCESSES):
queue_in.put(None)
# Trying to prevent Exception AssertionError: AssertionError() in
# <Finalize object, dead> ignored
gc.collect()
time.sleep(1)
gc.disable()
global generator_value
id_generator = mp.Value('i', generator_value)
generator_lock = mp.Lock()
# run workers
workers = []
for i in xrange(NUM_PROCESSES):
worker = mp.Process(target = process_package_worker,
args = (i, queue_in, id_generator,
generator_lock, db_struct,
repodir_id, build_archs, temp_dir,
no_shared_objects, no_so_symbols))
workers.append(worker)
worker.start()
queue_in.join()
gc.enable()
generator_value = id_generator.value
def add_repodir(xrepodir, conn):
dbc = conn.cursor()
dbc.execute("""
INSERT INTO repodirs (name, url, sources) VALUES (?, ?, ?)
""", [xrepodir.get('name'), xrepodir.get('url'), xrepodir.get('sources')])
repodir_id = dbc.lastrowid
for depend in xrepodir.findall('dependency'):
dbc.execute("""
INSERT INTO repodir_depends(repodir_id, depend_repodir_name) VALUES (?, ?)
""", [repodir_id, depend.text.strip()])
conn.commit()
return repodir_id
def get_build_archs(xrepodir, xrepodirs):
build_archs = []
for depend in xrepodir.findall('dependency'):
arch_sign = '$arch'
depend_repo = depend.text.strip()
spos = depend_repo.find(arch_sign)
if spos >= 0:
drepo_prefix = depend_repo[:spos]
drepo_postfix = depend_repo[spos + len(arch_sign):]
for xrepodir in xrepodirs.findall('dir'):
repodir_name = xrepodir.get('name')
if repodir_name.startswith(drepo_prefix) and \
repodir_name.endswith(drepo_postfix):
repo_arch = repodir_name[len(drepo_prefix) :
len(repodir_name) - len(drepo_postfix)]
if repo_arch == 'SRPMS':
continue
if repo_arch not in build_archs:
build_archs.append(repo_arch)
if build_archs:
return build_archs
return [None]
def import_repositories(options, conn):
init_database(conn)
rpm_db_struct = {}
tree = ET.parse(options.config, parser=ET.XMLParser())
config_root = tree.getroot()
temp_dir = '/dev/shm/rt-tmp/'
shutil.rmtree(temp_dir, True)
os.mkdir(temp_dir)
for xrepodir in config_root.find('repositories').findall('dir'):
repodir_id = add_repodir(xrepodir, conn)
build_archs = [None] if xrepodir.get('sources') != '.' else \
get_build_archs(xrepodir,
config_root.find('repositories'))
import_repodir(repodir_id, options.cache_dir,
build_archs, conn, rpm_db_struct, temp_dir,
options.no_shared_objects, options.no_so_symbols)
shutil.rmtree(temp_dir, True)
if not rpm_db_struct.get('defined'):
print 'Database was not initialized ' \
'(check whether repositories are empty).'
return False
index_database(conn)
return True
################################################
### Post-process repo.db after data import
################################################
def version_ok(required_version, compare_flag, candidate_version):
def sep_version(version):
vrem = re.match(r'\A(.+)(\-[^\-\:]+)(\:[^\:]+|)\Z', version)
if vrem:
return (vrem.group(1), vrem.group(2), vrem.group(3))
return (version, '', '')
def simple_version(version):
version = re.sub(r'[\-:]', '.', version)
version = re.sub(r'[a-z]+', '.', version, flags=re.I)
version = re.sub(r'\.\Z', '', version)
return version
def format_versions(ver1, ver2):
#see urpm-repoclosure, formatVersions
# v1 - provided
# v2 - required
(e1, e2) = (None, None)
e1_m = re.match(r'\A([^\-\:]+)\:(.*)', ver1)
if e1_m:
(e1, ver1) = (e1_m.group(1), e1_m.group(2))
e2_m = re.match(r'\A([^\-\:]+)\:(.*)', ver2)
if e2_m:
(e2, ver2) = (e2_m.group(1), e2_m.group(2))
(ver1_m, ver1_r, ver1_rr) = sep_version(ver1)
(ver2_m, ver2_r, ver2_rr) = sep_version(ver2)
if not ver2_rr:
ver1_rr = ''
if not ver2_r:
ver1_r = ''
ver1 = ver1_m + ver1_r + ver1_rr
ver2 = ver2_m + ver2_r + ver2_rr
if e1_m and e2_m:
ver1 = e1 + '.' + ver1
ver2 = e2 + '.' + ver2
return (simple_version(ver1), simple_version(ver2))
def cmp_nums(num1, num2):
# 00503
# 12
if num1 == num2:
return 0
lzeros1 = re.match(r'\A([0]+)([1-9].*)', num1)
if lzeros1:
(num1, num2) = (lzeros1.group(2), num2 + lzeros1.group(1))
lzeros2 = re.match(r'\A([0]+)([1-9].*)', num2)
if lzeros2:
(num2, num1) = (lzeros2.group(2), num1 + lzeros2.group(1))
diff = int(num1, 10) - int(num2, 10)
return 0 if diff == 0 else \
(1 if diff > 0 else -1)
def cmp_versions(version1, version2):
#see urpm-repoclosure, cmpVersions
# 3.2.5-5:2011.0
# NOTE: perl 5.00503 and 5.12
(v1, v2) = format_versions(version1, version2)
if v1 == v2:
return 0
v1parts = v1.split('.')
v2parts = v2.split('.')
for i in xrange(0, min(len(v1parts), len(v2parts))):
(num1, num2)= (v1parts[i], v2parts[i])
if (len(num1) > 0 and len(num2) == 0):
return 1
if (len(num1) == 0 and len(num2) > 0):
return -1
num_diff = cmp_nums(num1, num2)
if num_diff != 0:
return num_diff
if len(v1parts) < len(v2parts):
return -1
if len(v1parts) > len(v2parts):
return 1
return 0
def rpm_cmp_versions(version1, version2):
def stringToVersion(verstring):
# from rpmUtils
if verstring in [None, '']:
return (None, None, None)
e1_m = re.match(r'\A([^\-\:]+)\:(.*)', verstring)
epoch = None
if e1_m:
(epoch, verstring) = (e1_m.group(1), e1_m.group(2))
j = verstring.find('-')
if j != -1:
if verstring[:j] == '':
version = None
else:
version = verstring[:j]
release = verstring[j + 1:]
else:
if verstring == '':
version = None
else:
version = verstring
release = None
return (epoch, version, release)
(e1, v1, r1) = stringToVersion(version1)
(e2, v2, r2) = stringToVersion(version2)
if e1 is None or e2 is None:
e1 = '0'
e2 = '0'
result = rpm.labelCompare((e1, v1, r1), (e2, v2, r2))
return result
# print '===', required_version, compare_flag, candidate_version
if compare_flag == 0:
return True
if candidate_version == '*':
return True
#see urpm-repoclosure, checkDeps
if compare_flag == RPMSENSE_EQUAL and \
candidate_version == required_version:
return True
cmp_res = None
try:
cmp_res = cmp_versions(candidate_version, required_version)
except ValueError as ex:
print ('Error when comparing versions: "%s" and "%s"\n%s' %
(candidate_version, required_version, str(ex)))
return False
rpm_cmp_res = rpm_cmp_versions(candidate_version, required_version)
#if (cmp_res != rpm_cmp_res):
# print >> sys.stderr, (('Invalid compare: "%s" vs "%s"! '
# 'Results: rc: %d, rpm: %d.') %
# (candidate_version, required_version,
# cmp_res, rpm_cmp_res))
if compare_flag == RPMSENSE_EQUAL:
return cmp_res == 0
elif compare_flag == RPMSENSE_LESS | RPMSENSE_EQUAL:
return cmp_res <= 0
elif compare_flag == RPMSENSE_GREATER | RPMSENSE_EQUAL:
return cmp_res >= 0
elif compare_flag == RPMSENSE_LESS:
return cmp_res < 0
elif compare_flag == RPMSENSE_GREATER:
return cmp_res > 0
return False
def process_repodir_dependencies(conn, repodir_id, repodir_name,
repodir_depends, dep_type):
package_depends = conn.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
dep.id, dep.name, flags, dep.version
FROM packages, rpm_%s dep
WHERE repodir_id = ? AND dep.package_id = packages.id
ORDER BY packages.name, dep.name
""" % dep_type, [repodir_id]).fetchall()
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
dependency_cache = {}
for packdep in package_depends:
(cpackage_id, package_nvra, dep_id, dep_name, dep_flags, dep_version) = \
(packdep[0], packdep[2], packdep[3], packdep[4], packdep[5], packdep[6])
dep_res = []
depend_candidates = conn.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
prov.id, prov.name, flags, prov.version
FROM packages, rpm_provides AS prov
WHERE prov.package_id = packages.id AND repodir_id IN (%s) AND prov.name = ?
ORDER by packages.name, packages.nvra
""" % in_repodirs, [dep_name]).fetchall()
for dep_cand in depend_candidates:
(pkg_id, provides_id, provides_flags, provides_version) = \
(dep_cand[0], dep_cand[3], dep_cand[5], dep_cand[6])
if provides_flags & RPMSENSE_SENSEMASK == 0:
if not provides_version:
provides_version = '*'
else:
raise Exception('Invalid provides version '
'(flags = %d, version = %s)!' %
(provides_flags, provides_version))
if version_ok(dep_version, dep_flags & RPMSENSE_SENSEMASK,
provides_version):
dep_res.append({'prov_id': provides_id, 'pkg_id': pkg_id})
if len(dep_res) > 0:
for res_rec in dep_res:
conn.execute("""
INSERT INTO package_%(dep)s_res(package_id, %(dep)s_id,
provides_id, dep_package_id)
VALUES (?, ?, ?, ?)""" % {'dep': dep_type}, [cpackage_id, dep_id,
res_rec.get('prov_id'), res_rec.get('pkg_id')])
def process_repodir_requires(conn, repodir_id, repodir_name,
repodir_depends, requires_build_arch):
print 'Processing repo %d: %s (with depends: %s)' % (repodir_id, repodir_name, str(repodir_depends))
package_requires = conn.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
req.id, req.name, flags, req.version
FROM packages, rpm_requires req
WHERE repodir_id = ? AND req.package_id = packages.id %s
ORDER BY packages.name, req.name
""" % ((" AND build_arch = '%s'" % requires_build_arch)
if requires_build_arch is not None else ""),
[repodir_id]).fetchall()
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
# print 'package requires count: ', len(package_requires)
broken_dep = 0
for packreq in package_requires:
(cpackage_id, package_nvra, requires_id, requires_name, requires_flags, requires_version) = \
(packreq[0], packreq[2], packreq[3], packreq[4], packreq[5], packreq[6])
req_res = []
if (re.match(r'\A(rpmlib|executable)\(.+\)\Z', requires_name)):
# see if($N=~/\A(rpmlib|executable)\(.+\)\Z/) in urpm_repoclosure.pl
req_res.append({})
else:
depend_candidates = conn.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
prov.id, prov.name, flags, prov.version
FROM packages, rpm_provides AS prov
WHERE prov.package_id = packages.id AND repodir_id IN (%s) AND prov.name = ?
ORDER by packages.name, packages.nvra
""" % in_repodirs, [requires_name]).fetchall()
preferred_version = None
for dep_cand in depend_candidates:
(pkg_id, provides_id,
provides_flags, provides_version) = \
(dep_cand[0], dep_cand[3],
dep_cand[5], dep_cand[6])
if provides_flags & RPMSENSE_SENSEMASK == 0:
if not provides_version:
provides_version = '*'
else:
raise Exception('Invalid provides version '
'(flags = %d, version = %s)!' %
(provides_flags, provides_version))
if version_ok(requires_version,
requires_flags & RPMSENSE_SENSEMASK,
provides_version):
if pkg_id == cpackage_id:
# the same package is preferred over any other
req_res.append({'prov_id': provides_id,
'pkg_id': pkg_id})
preferred_version = None
break
better_version = preferred_version is None or \
provides_version == '*'
if not better_version:
better_version = version_ok(provides_version,
RPMSENSE_GREATER,
preferred_version)
if better_version:
preferred_version = provides_version
if preferred_version is not None:
for dep_cand in depend_candidates:
(pkg_id, provides_id, provides_version) = \
(dep_cand[0], dep_cand[3], dep_cand[6])
if provides_version == preferred_version or \
version_ok(provides_version, RPMSENSE_EQUAL,
preferred_version):
req_res.append({'prov_id': provides_id,
'pkg_id': pkg_id})
if len(req_res) == 0 and requires_name.startswith('/'): # file dependency
if (requires_flags & (RPMSENSE_SCRIPT_POST |
RPMSENSE_SCRIPT_PREUN |
RPMSENSE_SCRIPT_POSTUN)) != 0:
int_files_cnt = conn.execute("""
SELECT COUNT(1) FROM package_files WHERE package_id = ? AND path = ?
""", [cpackage_id, requires_name]).fetchone()
if int_files_cnt[0] > 0:
req_res.append({})
else:
files_deps = conn.execute("""
SELECT package_id FROM package_files
WHERE path = ? AND
package_id in (SELECT id FROM packages WHERE repodir_id IN (%s))
""" % in_repodirs, [requires_name]).fetchall()
for file_dep in files_deps:
req_res.append({'pkg_id': file_dep[0]})
if len(req_res) == 0 and (requires_flags & RPMSENSE_MISSINGOK) != 0:
req_res.append({})
if len(req_res) > 0:
for res_rec in req_res:
conn.execute("""
INSERT INTO package_requires_res(package_id, requires_id,
provides_id, dep_package_id)
VALUES (?, ?, ?, ?)
""", [cpackage_id, requires_id, res_rec.get('prov_id'), res_rec.get('pkg_id')])
else:
print requires_name, ' ', requires_version, ' (required by %s)' % package_nvra, ' not found!!!'
broken_dep += 1
print 'broken_deps: ', broken_dep
print ''
def process_repodir_file_links(conn, repodir_id, repodir_name, repodir_depends):
package_files_links = conn.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
package_files.id AS object_id, package_files.path, package_files.link_to_path
FROM packages, package_files
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
link_to_path IS NOT NULL
ORDER BY packages.name, link_to_path
""", [repodir_id]).fetchall()
for file_link in package_files_links:
pkg_id = file_link[0]
pkg_nvra = file_link[2]
object_id = file_link[3]
target_obj_id = None
target_path = os.path.normpath(file_link[5])
target_paths = {}
target_paths[target_path] = True
while target_path != '':
new_target_path = None
tofile = conn.execute("""
SELECT id, link_to_path FROM package_files WHERE path = ? AND package_id = ?
""", [target_path, pkg_id]).fetchone()
if tofile:
target_obj_id = tofile[0]
new_target_path = tofile[1]
if not target_obj_id:
# Just two levels of dependency recursion - TODO: Full depth recursion?
tofile = conn.execute("""
SELECT id, link_to_path FROM package_files WHERE path = ? AND package_id IN (
SELECT dep_package_id FROM package_requires_res WHERE package_id = ?
UNION
SELECT dep_package_id FROM package_requires_res WHERE package_id IN
(SELECT dep_package_id FROM package_requires_res WHERE package_id = ?)
)
""", [target_path, pkg_id, pkg_id]).fetchone()
if tofile:
target_obj_id = tofile[0]
new_target_path = tofile[1]
if new_target_path is None:
break
target_path = os.path.normpath(new_target_path)
if target_path in target_paths:
print 'Link loop detected! %s: %s -> %s' % (pkg_nvra, file_link[5], target_path)
target_obj_id = None
break
target_paths[target_path] = True
if target_obj_id:
conn.execute("""
UPDATE package_files SET link_to_file_id = ? WHERE id = ?
""", [target_obj_id, object_id])
else:
# print 'target %s not found (%d: %s)' % (target_path, pkg_id, pkg_name)
pass
def process_repodir_so_needed(conn, repodir_id, repodir_name, repodir_depends):
print 'Searching object files resolutions (1)...'
conn.execute("""
INSERT INTO so_needed_res(so_needed_id, dep_obj_file_id, res_type)
SELECT so_needed.id, tpf.id, 1 FROM packages
CROSS JOIN package_files spf CROSS JOIN so_needed CROSS JOIN rpm_requires
CROSS JOIN package_requires_res req_res CROSS JOIN package_files tpf
WHERE so_needed.obj_file_id = spf.id AND spf.package_id = packages.id AND
packages.repodir_id = ? AND spf.package_id = rpm_requires.package_id AND
(so_needed.name = rpm_requires.name OR
so_needed.name || '()(64bit)' = rpm_requires.name) AND
packages.id = req_res.package_id AND
rpm_requires.id = req_res.requires_id AND
req_res.dep_package_id = tpf.package_id AND
so_needed.name = tpf.basename
""", [repodir_id])
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
objects_not_resolved1 = conn.execute("""
SELECT packages.id AS package_id, packages.nvra,
package_files.id AS object_id, package_files.basename AS object_name,
so_needed.id AS so_needed_id, so_needed.name AS so_needed_name
FROM packages CROSS JOIN package_files CROSS JOIN so_needed
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND
NOT EXISTS (SELECT 1 FROM so_needed_res
WHERE so_needed_res.so_needed_id = so_needed.id)
ORDER BY packages.nvra, package_files.basename, so_needed.name
""", [repodir_id]).fetchall()
print 'Object files not resolved by rpm requires-provides: ', len(objects_not_resolved1)
if objects_not_resolved1:
print 'Searching object files resolutions (2)...'
in_so_needed = ','.join(str(obj_rec[4]) for obj_rec in objects_not_resolved1)
conn.execute("""
INSERT INTO so_needed_res(so_needed_id, dep_obj_file_id, res_type)
SELECT so_needed.id, tpf.id, 2 FROM packages, package_files tpf, so_needed
WHERE packages.repodir_id IN (%s) AND packages.id = tpf.package_id AND
so_needed.id IN (%s) AND tpf.basename = so_needed.name
""" % (in_repodirs, in_so_needed))
objects_not_resolved2 = conn.execute("""
SELECT packages.id AS package_id, packages.nvra,
package_files.id AS object_id, package_files.basename AS object_name,
so_needed.id AS so_needed_id, so_needed.name AS so_needed_name
FROM packages, package_files, so_needed
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND
NOT EXISTS (SELECT 1 FROM so_needed_res WHERE so_needed_res.so_needed_id = so_needed.id)
ORDER BY packages.nvra, package_files.basename, so_needed.name
""", [repodir_id]).fetchall()
print 'Object files not resolved: ', len(objects_not_resolved2)
def process_repodir_obj_symbols(conn, repodir_id, repodir_name, repodir_depends):
print 'Searching symbols resolutions (1)...'
# EXPLAIN QUERY PLAN
conn.execute("""
INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type)
SELECT sos.id, tos.id, 1 FROM packages CROSS JOIN package_files spf CROSS JOIN obj_symbols sos CROSS JOIN
so_needed CROSS JOIN so_needed_res CROSS JOIN package_files tpf CROSS JOIN obj_symbols tos
WHERE packages.repodir_id = ? AND packages.id = spf.package_id AND spf.id = sos.obj_file_id AND
sos.sym_type = 0 AND sos.obj_file_id = so_needed.obj_file_id AND so_needed.id = so_needed_res.so_needed_id AND
so_needed_res.res_type = 1 AND so_needed_res.dep_obj_file_id = tpf.id AND
(tos.obj_file_id = tpf.id OR tos.obj_file_id = tpf.link_to_file_id) AND
tos.sym_type = 1 AND tos.name = sos.name
""", [repodir_id])
print 'Searching symbols resolutions (2)...'
conn.execute("""
INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type)
SELECT sos.id, tos.id, 2 FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols sos CROSS JOIN
so_needed CROSS JOIN so_needed_res CROSS JOIN obj_symbols tos
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = sos.obj_file_id AND
sos.sym_type = 0 AND sos.obj_file_id = so_needed.obj_file_id AND so_needed.id = so_needed_res.so_needed_id AND
so_needed_res.res_type = 2 AND so_needed_res.dep_obj_file_id = tos.obj_file_id AND
tos.sym_type = 1 AND tos.name = sos.name
""", [repodir_id])
print 'Searching symbols resolutions (3)...'
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
conn.execute("""
INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type)
SELECT sos.id, tos.id, 3 FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols sos CROSS JOIN
obj_symbols tos CROSS JOIN package_files tpf
WHERE repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = sos.obj_file_id AND
sos.sym_type = 0 AND NOT EXISTS (SELECT 1 FROM obj_symbols_res WHERE obj_sym_id = sos.id) AND
sos.name = tos.name AND tos.sym_type = 1 AND tos.obj_file_id = tpf.id AND
tpf.package_id IN (SELECT id FROM packages WHERE repodir_id IN (%s))
""" % in_repodirs, [repodir_id])
def process_repodir_sources(conn, repodir_id, repo_sources):
if not repo_sources:
return
print 'Searching source rpms...'
conn.execute("""
UPDATE packages SET sourcerpm_package =
(SELECT id FROM packages ps
WHERE repodir_id IN (SELECT id FROM repodirs WHERE name = ?) AND
ps.nvra = substr(packages.sourcerpm, 1, length(packages.sourcerpm) - 4)
)
WHERE repodir_id = ? AND sourcerpm LIKE '%.rpm'
""", [repo_sources, repodir_id])
def process_repodir(conn, repo_id, repo_name, repo_sources,
depend_repodir_list, repodirs_processed, dep_arch):
all_depends_ready = True
repodir_depends = []
in_repodirs = ','.join(str(id) for id in repodirs_processed)
for dr_name in depend_repodir_list:
repodir_depend_found = conn.execute("""
SELECT id, name FROM repodirs WHERE id IN (%s) AND name = ?
""" % in_repodirs, [dr_name]).fetchall()
if len(repodir_depend_found) == 0:
all_depends_ready = False
break
else:
for rdf in repodir_depend_found:
repodir_depends.append(rdf[0])
if not all_depends_ready:
return False
print repo_name, ' ', depend_repodir_list, ' ', dep_arch
process_repodir_dependencies(conn, repo_id, repo_name, repodir_depends, 'conflicts')
process_repodir_dependencies(conn, repo_id, repo_name, repodir_depends, 'obsoletes')
process_repodir_requires(conn, repo_id, repo_name, repodir_depends, dep_arch)
process_repodir_file_links(conn, repo_id, repo_name, repodir_depends)
process_repodir_so_needed(conn, repo_id, repo_name, repodir_depends)
process_repodir_obj_symbols(conn, repo_id, repo_name, repodir_depends)
process_repodir_sources(conn, repo_id, repo_sources)
return True
def process_repodb(conn):
def extract_arch(arch_template, repo_name):
arch_sign = '$arch'
spos = arch_template.find(arch_sign)
if spos >= 0:
repo_prefix = arch_template[:spos]
repo_postfix = arch_template[spos + len(arch_sign):]
if repo_name.startswith(repo_prefix) and \
repo_name.endswith(repo_postfix):
return repo_name[len(repo_prefix) :
len(repo_name) - len(repo_postfix)]
return None
conn.executescript("""
DELETE FROM package_requires_res;
DELETE FROM package_conflicts_res;
DELETE FROM package_obsoletes_res;
DELETE FROM so_needed_res;
DELETE FROM obj_symbols_res;
UPDATE package_files SET link_to_file_id = NULL;
UPDATE packages SET sourcerpm_package = NULL;
ANALYZE;
""")
repodirs_processed = []
#Process binary rpms
repodirs_processed_cnt = -1
while repodirs_processed_cnt < len(repodirs_processed):
in_repodirs = ','.join(str(id) for id in repodirs_processed)
repodirs = conn.execute("""
SELECT id, name, sources FROM repodirs WHERE sources <> '.' AND id NOT IN (%s)
""" % in_repodirs).fetchall()
for repodir in repodirs:
(repodir_id, repodir_name, repodir_sources) = \
(repodir[0], repodir[1], repodir[2])
depend_repodir_names = conn.execute(
"""
SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ?
""", [repodir_id]).fetchall()
depend_repodir_list = [drn[0] for drn in depend_repodir_names]
if process_repodir(conn, repodir_id, repodir_name, repodir_sources,
depend_repodir_list, repodirs_processed, None):
repodirs_processed.append(repodir_id)
repodirs_processed_cnt = len(repodirs_processed)
#Process SRPMS
repodirs_processed_cnt = -1
while repodirs_processed_cnt < len(repodirs_processed):
repodirs = conn.execute("""
SELECT id, name, sources FROM repodirs WHERE sources = '.'
""").fetchall()
for repodir in repodirs:
(repodir_id, repodir_name, repodir_sources) = \
(repodir[0], repodir[1], repodir[2])
src_build_archs = []
depend_repodir_names = conn.execute(
"""
SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ?
""", [repodir_id]).fetchall()
for drn in depend_repodir_names:
dr_name = drn[0]
if '$arch' in dr_name:
depend_repodir_found = conn.execute(
"""
SELECT id, name FROM repodirs WHERE name LIKE ?
""", [dr_name.replace('$arch', '%')]).fetchall()
if len(depend_repodir_found) == 0:
raise Exception('Dependancy repositories not found!')
for drf in depend_repodir_found:
arch = extract_arch(dr_name, drf[1])
if arch:
if arch == 'SRPMS':
continue
src_build_archs.append(arch)
else:
raise Exception('Source repository should depend '
'on */$arch/* repo.')
for arch in src_build_archs:
depend_repodir_list = [drn[0].replace('$arch', arch)
for drn in depend_repodir_names]
if not process_repodir(conn, repodir_id, repodir_name, None,
depend_repodir_list, repodirs_processed,
arch):
raise Exception('Couldn\'t process SRPMS repository!')
repodirs_processed.append(repodir_id)
repodirs_processed_cnt = len(repodirs_processed)
in_repodirs = ','.join(str(id) for id in repodirs_processed)
repodirs_not_processed = conn.execute("""
SELECT id, name, sources FROM repodirs rd WHERE id NOT IN (%s)
""" % in_repodirs).fetchall()
if len(repodirs_not_processed) > 0:
print 'Repodirs not processed due to dependencies:'
for rdna in repodirs_not_processed:
print rdna[1]
conn.execute("""
ANALYZE""")
conn.commit()
def main(args):
global NUM_PROCESSES
if hasattr(os, "sysconf"):
if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
nproc = os.sysconf("SC_NPROCESSORS_ONLN")
if isinstance(nproc, int) and nproc > 0:
NUM_PROCESSES = nproc
options = parse_args()
if os.path.exists(DB):
os.unlink(DB)
conn = sqlite3.connect(DB)
if import_repositories(options, conn):
process_repodb(conn)
else:
os.unlink(DB)
if __name__ == "__main__":
main(sys.argv)