repo-analyzer/prepare-repodb.py

1436 lines
59 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/python
# -*- coding: utf-8 -*-
2014-02-06 11:44:02 +04:00
import os
import sys
import gettext
import argparse
import sqlite3
import rpm
2014-02-20 09:32:26 +04:00
import re
import xml.etree.ElementTree as ET
import subprocess
import shutil
import time
import multiprocessing as mp
import gc
import urllib
from urlparse import urlparse, urlunparse
gettext.install('urpm-tools')
DB = 'repo.db'
NUM_PROCESSES = 1 # number of CPU's (evaluated automatically)
RPMSENSE_LESS = 0x02
RPMSENSE_GREATER = 0x04
RPMSENSE_EQUAL = 0x08
RPMSENSE_SENSEMASK = 0x0f
RPMSENSE_SCRIPT_POST = 0x400
RPMSENSE_SCRIPT_PREUN = 0x800
RPMSENSE_SCRIPT_POSTUN = 0x1000
2014-02-20 09:32:26 +04:00
RPMSENSE_FIND_PROVIDES = 0x8000
RPMSENSE_MISSINGOK = 0x80000
2014-02-06 11:44:02 +04:00
2014-02-20 09:32:26 +04:00
RPMFILEMODE_DIRECTORY = 0x4000
RPMFILEMODE_EXECUTE = 0111
def parse_args():
parser = argparse.ArgumentParser(description=_('extract packages metadata'
' from RPM repositories'))
parser.add_argument('config', metavar='config',
help=_('path to repo-analyze-config.xml'))
parser.add_argument('-c', '--cache-dir',
help=_('path to cache directory'))
parser.add_argument('-O', '--no-shared-objects', action='store_true',
help=_('don\'t process shared objects'))
parser.add_argument('-S', '--no-so-symbols', action='store_true',
help=_('don\'t process shared object symbols'))
opts = parser.parse_args()
return opts
#################################################
# Fill database with the repositories data
################################################
def init_database(conn):
conn.executescript("""
CREATE TABLE repodirs(id INTEGER PRIMARY KEY NOT NULL,
name TEXT UNIQUE, url TEXT, arch TEXT, sources TEXT);
CREATE TABLE repodir_depends(id INTEGER PRIMARY KEY NOT NULL,
repodir_id INTEGER, depend_repodir_name TEXT);
CREATE TABLE IF NOT EXISTS package_files(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER NOT NULL, basename TEXT, path TEXT,
size INTEGER, mode INTEGER,
link_to_file_id INTEGER, link_to_path TEXT, mark TEXT);
CREATE TABLE package_requires_res(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER, requires_id INTEGER,
provides_id INTEGER, dep_package_id INTEGER);
CREATE TABLE package_conflicts_res(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER, conflicts_id INTEGER,
provides_id INTEGER, dep_package_id INTEGER);
CREATE TABLE package_obsoletes_res(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER, obsoletes_id INTEGER,
provides_id INTEGER, dep_package_id INTEGER);
CREATE TABLE so_needed(id INTEGER PRIMARY KEY NOT NULL,
obj_file_id INTEGER, name TEXT);
CREATE TABLE so_needed_res(id INTEGER PRIMARY KEY NOT NULL,
so_needed_id INTEGER, dep_obj_file_id INTEGER, res_type INTEGER);
CREATE TABLE obj_symbols(id INTEGER PRIMARY KEY NOT NULL,
obj_file_id INTEGER, name TEXT, sym_type INTEGER);
CREATE TABLE obj_symbols_res(id INTEGER PRIMARY KEY NOT NULL,
obj_sym_id INTEGER, dep_obj_sym_id INTEGER, res_type INTEGER);
PRAGMA synchronous = OFF;
PRAGMA journal_mode = OFF;
PRAGMA cache_size = -1048576;
""")
conn.commit()
def index_database(conn):
print 'Indexing the database...'
conn.executescript("""
CREATE INDEX rd_name ON repodirs(name);
CREATE INDEX pkg_name ON packages(name);
CREATE INDEX pkg_nvra ON packages(nvra);
CREATE INDEX pkg_arch ON packages(arch);
CREATE INDEX pkg_group ON packages(rpm_group);
CREATE INDEX pkg_repodir ON packages(repodir_id);
CREATE INDEX pkg_rq_pkg_req ON package_requires_res(package_id, requires_id);
CREATE INDEX pkg_rq_pkg_prov ON package_requires_res(dep_package_id, provides_id);
CREATE INDEX pkg_cf_pkg_conf ON package_conflicts_res(package_id, conflicts_id);
CREATE INDEX pkg_cf_pkg_prov ON package_conflicts_res(dep_package_id, provides_id);
CREATE INDEX pkg_ob_pkg_obs ON package_obsoletes_res(package_id, obsoletes_id);
CREATE INDEX pkg_ob_pkg_prov ON package_obsoletes_res(dep_package_id, provides_id);
CREATE INDEX pkg_file_pkg_id ON package_files(package_id);
CREATE INDEX pkg_file_name ON package_files(basename);
CREATE INDEX pkg_file_path ON package_files(path);
CREATE INDEX pkg_file_mark ON package_files(mark);
CREATE INDEX so_needed_obj_id ON so_needed(obj_file_id);
CREATE INDEX so_needed_res_sn ON so_needed_res(so_needed_id);
CREATE INDEX symbols_obj_name_type ON obj_symbols(obj_file_id, name, sym_type);
CREATE INDEX symbols_name_type ON obj_symbols(name, sym_type);
CREATE INDEX symbols_res_sym ON obj_symbols_res(obj_sym_id);
""")
dep_tables = ['rpm_requires', 'rpm_provides',
'rpm_conflicts', 'rpm_obsoletes']
for table in dep_tables:
conn.execute('CREATE INDEX %(tbl)s_pkg ON %(tbl)s(package_id)' %
{'tbl': table})
conn.execute('CREATE INDEX %(tbl)s_name ON %(tbl)s(name)' %
{'tbl': table})
conn.commit()
def get_rpm_header(rpm_ts, pkg):
hdr = None
try:
fdno = os.open(pkg, os.O_RDONLY)
except OSError as exc:
raise Exception('Unable to open file %s.\n%s' % (pkg, exc))
try:
hdr = rpm_ts.hdrFromFdno(fdno)
except rpm.error as exc:
raise Exception('Unable to read RPM header for %s\n%s.' % (pkg, exc))
finally:
os.close(fdno)
return hdr
def generate_new_id(generator, gen_lock):
gen_lock.acquire()
last_id = generator.value
last_id += 1
generator.value = last_id
gen_lock.release()
return last_id
FILE_REC_ID_IDX = 0
FILE_REC_PATH_IDX = 3
FILE_REC_LINK_IDX = 6
FILE_REC_MARK_IDX = 7
def register_object(data, object_file_record, temp_dir, no_so_symbols):
so_needed = data['so_needed']
obj_symbols = data['obj_symbols']
obj_id = object_file_record[0]
obj_file_path = object_file_record[3]
temp_obj_file = os.path.join(temp_dir, obj_file_path.lstrip('/'))
target_file = None
file_mark = None
od_out = ''
nmundef_out = ''
nmdef_out = ''
if os.path.islink(temp_obj_file):
target_file = os.path.join(os.path.dirname(obj_file_path),
os.readlink(temp_obj_file))
file_mark = 'link'
elif not os.path.exists(temp_obj_file):
file_mark = 'not-found'
else:
p = subprocess.Popen(['objdump', '-p', temp_obj_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
od_out = p.communicate()[0]
if p.returncode != 0:
file_mark = 'invalid-format'
elif not(no_so_symbols):
p = subprocess.Popen(['nm', '-p', '-D', '--undefined-only',
temp_obj_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
nmundef_out = p.communicate()[0]
if p.returncode != 0:
file_mark = 'no-symbols'
else:
p = subprocess.Popen(['nm', '-p', '-D', '--defined-only',
temp_obj_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
nmdef_out = p.communicate()[0]
if p.returncode != 0:
file_mark = 'no-symbols'
else:
file_mark = 'so'
object_file_record[FILE_REC_LINK_IDX] = target_file
object_file_record[FILE_REC_MARK_IDX] = file_mark
dynsection = False
for odline in od_out.split('\n'):
odls = odline.strip()
if odls == '':
dynsection = False
elif odls == 'Динамический раздел:' or odls == 'Dynamic section:':
dynsection = True
elif dynsection:
needrem = re.match(r'\s+NEEDED\s+(.*)', odline)
if needrem:
so_needed.append([obj_id, needrem.group(1)])
for symline in nmundef_out.split('\n'):
smre = re.match(r'^.([\S]*)\s+(\w)\s(.*)$', symline)
if smre:
if smre.group(2) in ['v', 'w']:
continue
symname = smre.group(3)
obj_symbols.append([obj_id, symname, 0])
for symline in nmdef_out.split('\n'):
smre = re.match(r'^.([\S]*)\s+(\w)\s(.*)$', symline)
if smre:
symname = smre.group(3)
obj_symbols.append([obj_id, symname, 1])
return obj_id
def extract_files(local_pkg, files_list, obj_so_files_idx, temp_dir):
filelist = os.path.join(temp_dir, 'files.lst')
with open(filelist, 'w') as f:
for i in obj_so_files_idx:
f.write('.' + files_list[i][FILE_REC_PATH_IDX] + '\n')
rpm_cpio_cmd = 'rpm2cpio ' + local_pkg + ' | cpio -ivdu -E ' + filelist
p = subprocess.Popen(rpm_cpio_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=temp_dir,
shell=True)
output = p.communicate()[0]
if p.returncode != 0:
print >> sys.stderr, 'Couldn\'t extract files from package %s.' \
'\n\t%s' % (local_pkg, output.decode('utf-8'))
return False
return True
def process_package_worker(num, queue_in, generator, gen_lock, db_struct,
repodir_id, build_archs, temp_dir,
no_shared_objects, no_so_symbols):
def to_string(pkg, tag, val):
if type(val) == type([]):
if not(val):
return None
try:
return str(val).decode('utf-8')
except:
print >> sys.stderr, ('Invalid UTF-8 string (%s: %s):\n' %
(pkg, tag))
print >> sys.stderr, val
return str(val).decode('utf-8', 'replace')
rpm_ts = rpm.TransactionSet()
rpm_ts.setVSFlags(~(rpm.RPMVSF_NEEDPAYLOAD))
data = {}
data['packages'] = []
for table in db_struct['dep_tables']:
data[table] = []
data['package_files'] = []
data['so_needed'] = []
data['obj_symbols'] = []
while True:
job = queue_in.get()
if job is None:
break
(pkg, ) = job
pkg_id = generate_new_id(generator, gen_lock)
local_pkg = get_local_file(pkg, temp_dir)
hdr = get_rpm_header(rpm_ts, local_pkg)
package_values = []
package_values.append(pkg_id)
for tag in db_struct['packages_tags']:
hval = hdr[tag]
package_values.append(
(sqlite3.Binary(hval) if len(hval)>0 else None)
if tag in db_struct['blob_tags'] else \
to_string(pkg, tag, hval) if \
type(hval) in [type([]), type('')] else hval
)
package_values.append(repodir_id)
package_values.append(pkg)
package_values.append(None)
data['packages'].append(package_values)
for table in db_struct['dep_tables']:
table_data = data[table]
rpref = 'RPMTAG_' + table[4 : -1].upper() # rpm_requires
(dep_name, dep_flags, dep_version) = \
(hdr[rpref + 'NAME'], hdr[rpref + 'FLAGS'], hdr[rpref + 'VERSION'])
for i in xrange(0, len(hdr[rpref + 'NAME'])):
for build_arch in build_archs:
table_data.append([dep_name[i].decode('utf-8'),
dep_flags[i],
dep_version[i],
pkg_id, build_arch])
(pkg_file_paths, pkg_file_names, pkg_file_sizes, pkg_file_modes) = \
(hdr['RPMTAG_FILEPATHS'], hdr['RPMTAG_BASENAMES'],
hdr['RPMTAG_FILESIZES'], hdr['RPMTAG_FILEMODES'])
files_list = data['package_files']
files_dirs = {}
obj_so_files_idx = []
for i in xrange(0, len(pkg_file_paths)):
file_name = pkg_file_names[i]
file_path = pkg_file_paths[i]
pkg_file_id = generate_new_id(generator, gen_lock)
files_list.append([pkg_file_id, #FILE_REC_ID_IDX = 0
pkg_id,
file_name.decode('utf-8'),
file_path.decode('utf-8'), #FILE_REC_PATH_IDX = 3
pkg_file_sizes[i],
pkg_file_modes[i],
None, #link_to_path FILE_REC_LINK_IDX = 6
None #mark FILE_REC_LINK_IDX = 7
])
if pkg_file_modes[i] & RPMFILEMODE_DIRECTORY != 0:
files_dirs[file_path] = False
continue
dir_name = os.path.dirname(file_path)
if dir_name != '' and dir_name not in files_dirs:
files_dirs[dir_name] = True
if no_shared_objects:
continue
if os.path.splitext(file_name)[1] in \
['.debug', '.xz', '.conf', '.py', '.c', '.h', '.hpp', '.png',
'.cc', '.cpp', '.sh', '.java', '.pl', '.patch', '.desktop']:
continue
if file_path.startswith('/usr/lib/debug/.build-id') or \
file_path.endswith('/ld.so.cache'):
continue
if re.search(r'\.so($|\.)', file_name) or \
(pkg_file_modes[i] & RPMFILEMODE_EXECUTE) != 0:
obj_so_files_idx.append(len(files_list) - 1)
for fdir in sorted(files_dirs.keys()):
if files_dirs[fdir]:
# Add parent directories as implicit files
# TODO: recursive processing?
pkg_file_id = generate_new_id(generator, gen_lock)
files_list.append([pkg_file_id, #FILE_REC_ID_IDX = 0
pkg_id,
os.path.basename(fdir),
fdir, #FILE_REC_PATH_IDX = 3
0,
-1, # special mode
None, #link_to_path FILE_REC_LINK_IDX = 6
None #mark FILE_REC_LINK_IDX = 7
])
if obj_so_files_idx:
pkg_temp_dir = os.path.join(temp_dir, os.path.basename(local_pkg))
os.makedirs(pkg_temp_dir)
if extract_files(local_pkg, files_list,
obj_so_files_idx, pkg_temp_dir):
for i in obj_so_files_idx:
register_object(data, files_list[i], pkg_temp_dir,
no_so_symbols)
shutil.rmtree(pkg_temp_dir, True)
remove_cached_file(pkg)
queue_in.task_done()
conn = None
retryCnt = 0
retry = True
while retry:
retry = False
try:
conn = sqlite3.connect(DB, timeout=30)
conn.executemany("""
2014-02-20 09:32:26 +04:00
INSERT INTO packages (%s) VALUES (%s)""" %
(db_struct['packages_field_names'],
db_struct['packages_values_template']),
data['packages'])
except sqlite3.OperationalError as ex:
if (str(ex) == 'database is locked'):
print 'Database is locked. Retrying...'
conn.close()
time.sleep(10)
retryCnt += 1
if retryCnt < 3:
retry = True
else:
raise ex
else:
raise ex
2014-02-20 09:32:26 +04:00
for table in db_struct['dep_tables']:
conn.executemany("""
INSERT INTO %s (name, flags, version, package_id, build_arch)
VALUES (?, ?, ?, ?, ?)""" % table, data[table])
conn.executemany("""
INSERT INTO package_files (id, package_id, basename, path,
size, mode, link_to_path, mark)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)""", data['package_files'])
conn.executemany("""
INSERT INTO so_needed(obj_file_id, name) VALUES(?, ?)
""", data['so_needed'])
conn.executemany("""
INSERT INTO obj_symbols(obj_file_id, name, sym_type) VALUES(?, ?, ?)
""", data['obj_symbols'])
conn.commit()
queue_in.task_done()
def get_files(url, ext):
filelist = []
urlp = urlparse(url)
if urlp.scheme in ['ftp', 'http', 'https']:
return parse_index_html(wget_url(url, None), url, '.rpm')
dir_list = os.listdir(url)
for d in dir_list:
if d.endswith(ext):
filepath = os.path.normpath(os.path.join(url, d))
filelist.append(filepath)
return filelist
local_cache = {}
def get_local_file(url, temp_dir):
urlp = urlparse(url)
if urlp.scheme in ['ftp', 'http', 'https']:
cached_file_name = local_cache.get(url)
if cached_file_name and os.path.isfile(cached_file_name):
return cached_file_name
cache_dir = os.path.join(temp_dir, 'cache')
if not os.path.isdir(cache_dir):
os.makedirs(cache_dir)
temp_file = os.path.join(cache_dir, os.path.basename(url))
wget_url(url, temp_file)
local_cache[url] = temp_file
return temp_file
return url
def remove_cached_file(url):
cached_file_name = local_cache.get(url)
if cached_file_name:
os.unlink(cached_file_name)
del local_cache[url]
def wget_url(url, target_file):
urlp = urlparse(url)
wget_params = []
site = urlp.netloc
if urlp.username:
wget_params = wget_params + ['--auth-no-challenge',
'--http-user=%s' % urlp.username,
'--http-password=%s' %
('""' if not urlp.password else urlp.password)]
site = site[site.find('@') + 1:]
url = urlunparse((urlp.scheme, site, urlp.path, urlp.params,
urlp.query, urlp.fragment))
print 'Downloading %s...' % url
if target_file is None:
wget_params += ['-nv', '-O-', url]
else:
wget_params += ['-nv', '-O', target_file, url]
p = subprocess.Popen(['wget'] + wget_params,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
wget_out, wget_err = p.communicate()
if p.returncode != 0:
print >> sys.stderr, ('Unable to get data from the url: %s '
'(error: %d).\n%s\n%s') % \
2014-11-05 15:01:25 +03:00
(url, p.returncode, wget_out.decode('utf-8'), wget_err.decode('utf-8'))
2014-02-20 09:32:26 +04:00
raise Exception('Unable to download data (%d).' % p.returncode)
if target_file is None:
return wget_out
def parse_index_html(index_html, base_url, filter_ext):
file_list = []
for match in re.finditer(r'href="([^"]+)"', index_html, re.M):
filename = match.group(1)
if filename.endswith(filter_ext):
if '://' in filename[:8]:
file_list.append(filename)
continue
filepath = os.path.join(base_url, filename)
if os.path.dirname(filepath) == base_url.rstrip('/') and \
os.path.basename(filepath) == filename:
file_list.append(filepath)
return file_list
def download_repodir(source_urlp, cache_dir):
site = source_urlp.netloc
site = site[site.find('@') + 1:]
target_dir = os.path.join(cache_dir,
site,
source_urlp.path.lstrip('/'))
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
remote_files = {}
if source_urlp.scheme in ['ftp', 'http', 'https']:
source_url = source_urlp.geturl()
remote_dir_contents = parse_index_html(wget_url(source_url, None),
source_url, '.rpm')
for remote_file in remote_dir_contents:
remote_filename = urllib.unquote(os.path.basename(remote_file))
remote_files[remote_filename] = True
target_file = os.path.join(target_dir, remote_filename)
if os.path.isfile(target_file):
continue
wget_url(remote_file, target_file)
for local_filename in os.listdir(target_dir):
if local_filename not in remote_files and \
local_filename.endswith('.rpm'):
print 'Removing local file: %s.' % local_filename
os.unlink(os.path.join(target_dir, local_filename))
return target_dir
def urpm_get_packages(media):
extra_params = []
if not media.endswith(' update'):
extra_params = ['--exclude-media', media + ' update']
p = subprocess.Popen(['urpmq', '-r', '--ignorearch',
'--list', '--media', media] +
extra_params,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
urpmqr_out, urpmqr_err = p.communicate()
if p.returncode != 0 or len(urpmqr_err) > 0:
print >> sys.stderr, ('Unable to get a list of packages '
'from the media: %s.\n'
2014-11-05 15:01:25 +03:00
'%s\n%s') % (media, urpmqr_out.decode('utf-8'), urpmq_err.decode('utf-8'))
2014-02-20 09:32:26 +04:00
raise Exception('Unable to get a list of packages (%d).' % p.returncode)
# urpmi --no-install --allow-nodeps --force
# --download-all=/tmp/ xine-wavpack-1.2.4-1plf --media Desktop2012.1-8
p = subprocess.Popen(['urpmq', '-f', '--ignorearch',
'--list', '--media', media] +
extra_params,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
urpmqf_out, urpmqf_err = p.communicate()
if p.returncode != 0 or len(urpmqf_err) > 0:
print >> sys.stderr, ('Unable to get a list of packages '
'from the media: %s.\n'
2014-11-05 15:01:25 +03:00
'%s\n%s') % (media, urpmqf_out.decode('utf-8'), urpmqf_err.decode('utf-8'))
2014-02-20 09:32:26 +04:00
raise Exception('Unable to get a list of packages (%d).' % p.returncode)
rpm_list = []
qr_lines = urpmqr_out.split('\n')
qf_lines = urpmqf_out.split('\n')
if len(qr_lines) != len(qf_lines):
raise Exception('Not consistent urpmq -r and urpmq -f outputs '
'(%d and %d lines).' % (len(qr_lines), len(qf_lines)))
for i in xrange(0, len(qf_lines)):
qf_line = qf_lines[i]
if qf_line.strip() == '':
continue
if not qf_line.startswith(qr_lines[i]):
raise Exception('Not consistent urpmq -r and urpmq -f outputs: '
'%s and %s' % (qr_lines[i], qf_line))
rpm_list.append('urpm://%s/%s.rpm#%s' % (urllib.quote(media),
urllib.quote(qf_line),
urllib.quote(qr_lines[i])))
return rpm_list
def get_urpmi(urpm_package, target_dir):
urlp = urlparse(urpm_package)
package_name = urllib.unquote(urlp.fragment)
print package_name
p = subprocess.Popen(['urpmi', '--no-install',
'--force', '--no-suggests',
'--allow-nodeps',
'--no-download-all',
'--media', urlp.netloc,
package_name])
#stdout=subprocess.PIPE,
#stderr=subprocess.PIPE)
urpmi_out, urpmi_err = p.communicate()
if p.returncode != 0:
print >> sys.stderr, ('Unable to get the package %s '
'from the media %s.\n'
'%s\n%s') % (
package_name, urlp.netloc,
2014-11-05 15:01:25 +03:00
urpmi_out.decode('utf-8'), urpmi_err.decode('utf-8'))
2014-02-20 09:32:26 +04:00
raise Exception('Unable to get the package %s (%d).' %
(package_name, p.returncode))
def urpm_get_repodir(repodir_name, cache_dir):
target_dir = os.path.join(cache_dir,
repodir_name,
'rpms')
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
urpm_files = {}
urpm_media_contents = urpm_get_packages(repodir_name)
for urpm_package in urpm_media_contents:
remote_filename = urllib.unquote(os.path.basename(urpm_package))
target_file = os.path.join(target_dir, remote_filename)
get_urpmi(urpm_package, os.path.join(cache_dir,
repodir_name))
print target_file
raise Exception('Not implemented.')
generator_value = 0
def import_repodir(repodir_id, cache_dir, build_archs, conn,
db_struct, temp_dir, no_shared_objects, no_so_symbols):
rd_rec = conn.execute("""SELECT name, url FROM repodirs WHERE id = ?""",
[repodir_id]).fetchone()
(repodir_name, repodir_url) = (rd_rec[0], rd_rec[1])
urlp = urlparse(repodir_url)
working_url = repodir_url
if cache_dir is not None:
if urlp.scheme in ['ftp', 'http', 'https']:
working_url = download_repodir(urlp, cache_dir)
elif urlp.scheme == 'urpm':
working_url = urpm_get_repodir(repodir_name, cache_dir)
elif urlp.scheme not in ['', 'file']:
raise Exception('Invalid scheme in the repository url: %s' %
repodir_url)
rpm_list = []
rpm_list = get_files(working_url, '.rpm')
if not rpm_list:
return
print urlp.netloc[urlp.netloc.find('@') + 1:] + urlp.path, ': ', \
len(rpm_list)
if not db_struct.get('defined'):
rpm_ts = rpm.TransactionSet()
rpm_ts.setVSFlags(~(rpm.RPMVSF_NEEDPAYLOAD))
# ts.setVSFlags(~(rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD))
hdr = get_rpm_header(rpm_ts, get_local_file(rpm_list[0], temp_dir))
# Retain sort order!
packages_extra_fields = {'repodir_id': 'INTEGER',
'rpm_url': 'TEXT',
'sourcerpm_package': 'TEXT'}
file_tags_re = r'^RPMTAG_(BASENAMES|FILE[\w\d]+)'
dir_tags_re = r'^RPMTAG_DIR(INDEXES|NAMES)'
changelog_tags_re = r'^RPMTAG_CHANGELOG\w+'
trigger_tags_re = r'^RPMTAG_TRIGGER\w+'
datetime_tags = ['RPMTAG_PACKAGETIME', 'RPMTAG_RPMLIBTIMESTAMP', ]
db_struct['blob_tags'] = ['RPMTAG_RSAHEADER', 'RPMTAG_DSAHEADER',
'RPMTAG_HEADERIMMUTABLE', 'RPMTAG_SIGMD5',
'RPMTAG_PKGID', 'RPMTAG_SOURCEPKGID']
reserved_field_names = ['id', 'group']
skip_tags_re = '^RPMTAG_(C|D|E|N|P|R|V|HEADERIMMUTABLE)$'
#C - CONFLICTNAME, D - DISTEPOCH, E - EPOCH, N - NAME, O - OBSOLETENAME
#P - PROVIDENAME, R - RELEASE, V - VERSION
types = {"<type 'str'>" : "TEXT", "<type 'int'>": "INTEGER",
"<type 'NoneType'>": "TEXT", "<type 'list'>": "TEXT"}
dep_tags_re = r'^RPMTAG_(CONFLICT|OBSOLETE|PROVIDE|REQUIRE)\w+'
db_struct['dep_tables'] = ['rpm_requires', 'rpm_provides',
'rpm_conflicts', 'rpm_obsoletes']
packages_field_names = 'id, '
packages_values_template = '?,'
packages_tags = []
packages_fields = ''
rpmtags = [str(t) for t in dir(rpm) if t.startswith('RPMTAG_') ]
for tag in rpmtags:
if (re.match(file_tags_re, tag) or re.match(dir_tags_re, tag) or
re.match(changelog_tags_re, tag) or
re.match(skip_tags_re, tag) or
re.match(trigger_tags_re, tag) or
re.match(dep_tags_re, tag)):
continue
sqltype = "TIMESTAMP" if tag in datetime_tags else \
"BLOB" if tag in db_struct['blob_tags'] else \
types[str(type(hdr[tag]))]
fieldname = tag.replace('RPMTAG_', '').lower()
if fieldname in reserved_field_names:
fieldname = 'rpm_' + fieldname
packages_tags.append(tag)
packages_field_names += fieldname + ', '
packages_values_template += '?, '
packages_fields += fieldname + ' ' + sqltype + ', '
nef = 0
for extra_field in sorted(packages_extra_fields.keys()):
packages_field_names += (', ' if nef > 0 else '') + extra_field
packages_values_template += (', ' if nef > 0 else '') + '?'
packages_fields += (', ' if nef > 0 else '') + extra_field + ' ' + \
packages_extra_fields[extra_field]
nef += 1
conn.execute("""
CREATE TABLE IF NOT EXISTS packages(id INTEGER PRIMARY KEY NOT NULL, %s)
""" % (packages_fields))
for table in db_struct['dep_tables']:
conn.execute("""
CREATE TABLE IF NOT EXISTS %s (id INTEGER PRIMARY KEY NOT NULL,
name TEXT, flags INTEGER, version TEXT, build_arch TEXT,
package_id INTEGER NOT NULL)""" % (table))
conn.commit()
db_struct['packages_tags'] = packages_tags
db_struct['packages_field_names'] = packages_field_names
db_struct['packages_values_template'] = packages_values_template
db_struct['defined'] = True
queue_in = mp.JoinableQueue()
for pkg in rpm_list:
queue_in.put((pkg, ))
for i in xrange(NUM_PROCESSES):
queue_in.put(None)
# Trying to prevent Exception AssertionError: AssertionError() in
# <Finalize object, dead> ignored
gc.collect()
time.sleep(1)
gc.disable()
global generator_value
id_generator = mp.Value('i', generator_value)
generator_lock = mp.Lock()
# run workers
workers = []
for i in xrange(NUM_PROCESSES):
worker = mp.Process(target = process_package_worker,
args = (i, queue_in, id_generator,
generator_lock, db_struct,
repodir_id, build_archs, temp_dir,
no_shared_objects, no_so_symbols))
workers.append(worker)
worker.start()
queue_in.join()
gc.enable()
generator_value = id_generator.value
def add_repodir(xrepodir, conn):
dbc = conn.cursor()
dbc.execute("""
INSERT INTO repodirs (name, url, sources) VALUES (?, ?, ?)
""", [xrepodir.get('name'), xrepodir.get('url'), xrepodir.get('sources')])
repodir_id = dbc.lastrowid
for depend in xrepodir.findall('dependency'):
dbc.execute("""
INSERT INTO repodir_depends(repodir_id, depend_repodir_name) VALUES (?, ?)
""", [repodir_id, depend.text.strip()])
conn.commit()
return repodir_id
def get_build_archs(xrepodir, xrepodirs):
build_archs = []
for depend in xrepodir.findall('dependency'):
arch_sign = '$arch'
depend_repo = depend.text.strip()
spos = depend_repo.find(arch_sign)
if spos >= 0:
drepo_prefix = depend_repo[:spos]
drepo_postfix = depend_repo[spos + len(arch_sign):]
for xrepodir in xrepodirs.findall('dir'):
repodir_name = xrepodir.get('name')
if repodir_name.startswith(drepo_prefix) and \
repodir_name.endswith(drepo_postfix):
repo_arch = repodir_name[len(drepo_prefix) :
len(repodir_name) - len(drepo_postfix)]
if repo_arch == 'SRPMS':
continue
if repo_arch not in build_archs:
build_archs.append(repo_arch)
if build_archs:
return build_archs
return [None]
def import_repositories(options, conn):
init_database(conn)
rpm_db_struct = {}
tree = ET.parse(options.config, parser=ET.XMLParser())
config_root = tree.getroot()
temp_dir = '/dev/shm/rt-tmp/'
shutil.rmtree(temp_dir, True)
os.mkdir(temp_dir)
for xrepodir in config_root.find('repositories').findall('dir'):
repodir_id = add_repodir(xrepodir, conn)
build_archs = [None] if xrepodir.get('sources') != '.' else \
get_build_archs(xrepodir,
config_root.find('repositories'))
import_repodir(repodir_id, options.cache_dir,
build_archs, conn, rpm_db_struct, temp_dir,
options.no_shared_objects, options.no_so_symbols)
shutil.rmtree(temp_dir, True)
if not rpm_db_struct.get('defined'):
print 'Database was not initialized ' \
'(check whether repositories are empty).'
return False
index_database(conn)
return True
################################################
### Post-process repo.db after data import
################################################
2014-02-06 11:44:02 +04:00
def version_ok(required_version, compare_flag, candidate_version, context=''):
def sep_version(version):
vrem = re.match(r'\A(.+)(\-[^\-\:]+)(\:[^\:]+|)\Z', version)
if vrem:
return (vrem.group(1), vrem.group(2), vrem.group(3))
return (version, '', '')
def simple_version(version):
version = re.sub(r'[\-:]', '.', version)
version = re.sub(r'[a-z]+', '.', version, flags=re.I)
version = re.sub(r'\.\Z', '', version)
return version
def format_versions(ver1, ver2):
#see urpm-repoclosure, formatVersions
# v1 - provided
# v2 - required
(e1, e2) = (None, None)
e1_m = re.match(r'\A([^\-\:]+)\:(.*)', ver1)
if e1_m:
(e1, ver1) = (e1_m.group(1), e1_m.group(2))
e2_m = re.match(r'\A([^\-\:]+)\:(.*)', ver2)
if e2_m:
(e2, ver2) = (e2_m.group(1), e2_m.group(2))
(ver1_m, ver1_r, ver1_rr) = sep_version(ver1)
(ver2_m, ver2_r, ver2_rr) = sep_version(ver2)
if not ver2_rr:
ver1_rr = ''
if not ver2_r:
ver1_r = ''
ver1 = ver1_m + ver1_r + ver1_rr
ver2 = ver2_m + ver2_r + ver2_rr
if e1_m and e2_m:
ver1 = e1 + '.' + ver1
ver2 = e2 + '.' + ver2
return (simple_version(ver1), simple_version(ver2))
def cmp_nums(num1, num2):
# 00503
# 12
if num1 == num2:
return 0
lzeros1 = re.match(r'\A([0]+)([1-9].*)', num1)
if lzeros1:
(num1, num2) = (lzeros1.group(2), num2 + lzeros1.group(1))
lzeros2 = re.match(r'\A([0]+)([1-9].*)', num2)
if lzeros2:
(num2, num1) = (lzeros2.group(2), num1 + lzeros2.group(1))
diff = int(num1, 10) - int(num2, 10)
return 0 if diff == 0 else \
(1 if diff > 0 else -1)
def cmp_versions(version1, version2):
#see urpm-repoclosure, cmpVersions
# 3.2.5-5:2011.0
# NOTE: perl 5.00503 and 5.12
(v1, v2) = format_versions(version1, version2)
if v1 == v2:
return 0
v1parts = v1.split('.')
v2parts = v2.split('.')
for i in xrange(0, min(len(v1parts), len(v2parts))):
(num1, num2)= (v1parts[i], v2parts[i])
if (len(num1) > 0 and len(num2) == 0):
return 1
if (len(num1) == 0 and len(num2) > 0):
return -1
num_diff = cmp_nums(num1, num2)
if num_diff != 0:
return num_diff
if len(v1parts) < len(v2parts):
return -1
if len(v1parts) > len(v2parts):
return 1
return 0
def rpm_cmp_versions(version1, version2):
def stringToVersion(verstring):
# from rpmUtils
if verstring in [None, '']:
return (None, None, None)
e1_m = re.match(r'\A([^\-\:]+)\:(.*)', verstring)
epoch = None
if e1_m:
(epoch, verstring) = (e1_m.group(1), e1_m.group(2))
j = verstring.find(':')
if j != -1:
# ignore ":2012.0" in 2014-10:2012.0
verstring = verstring[:j]
j = verstring.find('-')
if j != -1:
if verstring[:j] == '':
version = None
else:
version = verstring[:j]
release = verstring[j + 1:]
else:
if verstring == '':
version = None
else:
version = verstring
release = None
return (epoch, version, release)
(e1, v1, r1) = stringToVersion(version1)
(e2, v2, r2) = stringToVersion(version2)
if e1 is None or e2 is None:
e1 = '0'
e2 = '0'
result = rpm.labelCompare((e1, v1, r1), (e2, v2, r2))
return result
# print '===', required_version, compare_flag, candidate_version
if compare_flag == 0:
return True
if candidate_version == '*':
return True
#see urpm-repoclosure, checkDeps
if compare_flag == RPMSENSE_EQUAL and \
candidate_version == required_version:
return True
#cmp_res = None
#try:
# cmp_res = cmp_versions(candidate_version, required_version)
#except ValueError as ex:
# print ('Error when comparing versions (%s): "%s" and "%s"\n%s' %
# (context, candidate_version, required_version, str(ex)))
# return False
cmp_res = rpm_cmp_versions(candidate_version, required_version)
#if (cmp_res != rpm_cmp_res):
2014-02-20 09:32:26 +04:00
# print >> sys.stderr, (('Invalid compare: "%s" vs "%s"! '
# 'Results: rc: %d, rpm: %d.') %
# (candidate_version, required_version,
# cmp_res, rpm_cmp_res))
if compare_flag == RPMSENSE_EQUAL:
return cmp_res == 0
elif compare_flag == RPMSENSE_LESS | RPMSENSE_EQUAL:
return cmp_res <= 0
elif compare_flag == RPMSENSE_GREATER | RPMSENSE_EQUAL:
return cmp_res >= 0
elif compare_flag == RPMSENSE_LESS:
return cmp_res < 0
elif compare_flag == RPMSENSE_GREATER:
return cmp_res > 0
return False
2014-02-20 09:32:26 +04:00
def process_repodir_dependencies(conn, repodir_id, repodir_name,
repodir_depends, dep_type):
package_depends = conn.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
dep.id, dep.name, flags, dep.version
FROM packages, rpm_%s dep
WHERE repodir_id = ? AND dep.package_id = packages.id
ORDER BY packages.name, dep.name
""" % dep_type, [repodir_id]).fetchall()
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
dependency_cache = {}
for packdep in package_depends:
(cpackage_id, package_nvra, dep_id, dep_name, dep_flags, dep_version) = \
(packdep[0], packdep[2], packdep[3], packdep[4], packdep[5], packdep[6])
2014-02-20 09:32:26 +04:00
dep_res = []
depend_candidates = conn.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
prov.id, prov.name, flags, prov.version
FROM packages, rpm_provides AS prov
WHERE prov.package_id = packages.id AND repodir_id IN (%s) AND prov.name = ?
ORDER by packages.name, packages.nvra
""" % in_repodirs, [dep_name]).fetchall()
2014-02-20 09:32:26 +04:00
for dep_cand in depend_candidates:
(pkg_id, provides_id, provides_flags, provides_version) = \
(dep_cand[0], dep_cand[3], dep_cand[5], dep_cand[6])
if provides_flags & RPMSENSE_SENSEMASK == 0:
if not provides_version:
provides_version = '*'
else:
raise Exception('Invalid provides version '
'(flags = %d, version = %s)!' %
(provides_flags, provides_version))
if version_ok(dep_version, dep_flags & RPMSENSE_SENSEMASK,
provides_version, '%s %s %s' %
(package_nvra, dep_type, dep_name)):
2014-02-20 09:32:26 +04:00
dep_res.append({'prov_id': provides_id, 'pkg_id': pkg_id})
if len(dep_res) > 0:
for res_rec in dep_res:
2014-02-20 09:32:26 +04:00
conn.execute("""
INSERT INTO package_%(dep)s_res(package_id, %(dep)s_id,
provides_id, dep_package_id)
VALUES (?, ?, ?, ?)""" % {'dep': dep_type}, [cpackage_id, dep_id,
res_rec.get('prov_id'), res_rec.get('pkg_id')])
2014-02-20 09:32:26 +04:00
def process_repodir_requires(conn, repodir_id, repodir_name,
repodir_depends, requires_build_arch):
2014-02-06 11:44:02 +04:00
print 'Processing repo %d: %s (with depends: %s)' % (repodir_id, repodir_name, str(repodir_depends))
2014-02-20 09:32:26 +04:00
package_requires = conn.execute("""
2014-02-06 11:44:02 +04:00
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
req.id, req.name, flags, req.version
FROM packages, rpm_requires req
WHERE repodir_id = ? AND req.package_id = packages.id %s
ORDER BY packages.name, req.name
2014-02-06 11:44:02 +04:00
""" % ((" AND build_arch = '%s'" % requires_build_arch)
if requires_build_arch is not None else ""),
[repodir_id]).fetchall()
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
# print 'package requires count: ', len(package_requires)
broken_dep = 0
for packreq in package_requires:
(cpackage_id, package_nvra, requires_id, requires_name, requires_flags, requires_version) = \
(packreq[0], packreq[2], packreq[3], packreq[4], packreq[5], packreq[6])
req_res = []
if (re.match(r'\A(rpmlib|executable)\(.+\)\Z', requires_name)):
# see if($N=~/\A(rpmlib|executable)\(.+\)\Z/) in urpm_repoclosure.pl
req_res.append({})
else:
2014-02-20 09:32:26 +04:00
depend_candidates = conn.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
prov.id, prov.name, flags, prov.version
FROM packages, rpm_provides AS prov
WHERE prov.package_id = packages.id AND repodir_id IN (%s) AND prov.name = ?
ORDER by packages.name, packages.nvra
2014-02-06 11:44:02 +04:00
""" % in_repodirs, [requires_name]).fetchall()
preferred_version = None
for dep_cand in depend_candidates:
(pkg_id, provides_id,
provides_flags, provides_version) = \
(dep_cand[0], dep_cand[3],
dep_cand[5], dep_cand[6])
if provides_flags & RPMSENSE_SENSEMASK == 0:
if not provides_version:
provides_version = '*'
else:
raise Exception('Invalid provides version '
'(flags = %d, version = %s)!' %
(provides_flags, provides_version))
if version_ok(requires_version,
requires_flags & RPMSENSE_SENSEMASK,
provides_version,
'%s requires %s' % (package_nvra, requires_name)):
if pkg_id == cpackage_id:
# the same package is preferred over any other
req_res.append({'prov_id': provides_id,
'pkg_id': pkg_id})
preferred_version = None
break
better_version = preferred_version is None or \
provides_version == '*'
if not better_version:
better_version = version_ok(provides_version,
RPMSENSE_GREATER,
preferred_version,
'%s requires %s' % (package_nvra, requires_name))
if better_version:
preferred_version = provides_version
if preferred_version is not None:
for dep_cand in depend_candidates:
(pkg_id, provides_id, provides_version) = \
(dep_cand[0], dep_cand[3], dep_cand[6])
if provides_version == preferred_version or \
version_ok(provides_version, RPMSENSE_EQUAL,
preferred_version,
'%s requires %s' % (package_nvra, requires_name)):
req_res.append({'prov_id': provides_id,
'pkg_id': pkg_id})
if len(req_res) == 0 and requires_name.startswith('/'): # file dependency
if (requires_flags & (RPMSENSE_SCRIPT_POST |
RPMSENSE_SCRIPT_PREUN |
RPMSENSE_SCRIPT_POSTUN)) != 0:
2014-02-20 09:32:26 +04:00
int_files_cnt = conn.execute("""
SELECT COUNT(1) FROM package_files WHERE package_id = ? AND path = ?
""", [cpackage_id, requires_name]).fetchone()
if int_files_cnt[0] > 0:
req_res.append({})
else:
2014-02-20 09:32:26 +04:00
files_deps = conn.execute("""
2014-02-06 11:44:02 +04:00
SELECT package_id FROM package_files
WHERE path = ? AND
package_id in (SELECT id FROM packages WHERE repodir_id IN (%s))
""" % in_repodirs, [requires_name]).fetchall()
for file_dep in files_deps:
req_res.append({'pkg_id': file_dep[0]})
if len(req_res) == 0 and (requires_flags & RPMSENSE_MISSINGOK) != 0:
req_res.append({})
if len(req_res) > 0:
for res_rec in req_res:
2014-02-20 09:32:26 +04:00
conn.execute("""
INSERT INTO package_requires_res(package_id, requires_id,
provides_id, dep_package_id)
VALUES (?, ?, ?, ?)
""", [cpackage_id, requires_id, res_rec.get('prov_id'), res_rec.get('pkg_id')])
2014-02-06 11:44:02 +04:00
else:
print requires_name, requires_version, '(required by %s)' % package_nvra, 'not found!!!'
2014-02-06 11:44:02 +04:00
broken_dep += 1
print 'broken_deps: ', broken_dep
print ''
2014-02-20 09:32:26 +04:00
def process_repodir_file_links(conn, repodir_id, repodir_name, repodir_depends):
package_files_links = conn.execute("""
2014-02-06 11:44:02 +04:00
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
package_files.id AS object_id, package_files.path, package_files.link_to_path
FROM packages, package_files
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
link_to_path IS NOT NULL
ORDER BY packages.name, link_to_path
""", [repodir_id]).fetchall()
for file_link in package_files_links:
pkg_id = file_link[0]
pkg_nvra = file_link[2]
2014-02-06 11:44:02 +04:00
object_id = file_link[3]
target_obj_id = None
target_path = os.path.normpath(file_link[5])
target_paths = {}
target_paths[target_path] = True
while target_path != '':
new_target_path = None
2014-02-20 09:32:26 +04:00
tofile = conn.execute("""
SELECT id, link_to_path FROM package_files WHERE path = ? AND package_id = ?
""", [target_path, pkg_id]).fetchone()
if tofile:
target_obj_id = tofile[0]
new_target_path = tofile[1]
if not target_obj_id:
# Just two levels of dependency recursion - TODO: Full depth recursion?
2014-02-20 09:32:26 +04:00
tofile = conn.execute("""
SELECT id, link_to_path FROM package_files WHERE path = ? AND package_id IN (
SELECT dep_package_id FROM package_requires_res WHERE package_id = ?
UNION
SELECT dep_package_id FROM package_requires_res WHERE package_id IN
(SELECT dep_package_id FROM package_requires_res WHERE package_id = ?)
)
""", [target_path, pkg_id, pkg_id]).fetchone()
if tofile:
target_obj_id = tofile[0]
new_target_path = tofile[1]
if new_target_path is None:
break
target_path = os.path.normpath(new_target_path)
if target_path in target_paths:
print 'Link loop detected! %s: %s -> %s' % (pkg_nvra, file_link[5], target_path)
target_obj_id = None
break
target_paths[target_path] = True
2014-02-06 11:44:02 +04:00
if target_obj_id:
2014-02-20 09:32:26 +04:00
conn.execute("""
2014-02-06 11:44:02 +04:00
UPDATE package_files SET link_to_file_id = ? WHERE id = ?
""", [target_obj_id, object_id])
else:
# print 'target %s not found (%d: %s)' % (target_path, pkg_id, pkg_name)
pass
2014-02-20 09:32:26 +04:00
def process_repodir_so_needed(conn, repodir_id, repodir_name, repodir_depends):
2014-02-06 11:44:02 +04:00
print 'Searching object files resolutions (1)...'
2014-02-20 09:32:26 +04:00
conn.execute("""
2014-02-06 11:44:02 +04:00
INSERT INTO so_needed_res(so_needed_id, dep_obj_file_id, res_type)
SELECT so_needed.id, tpf.id, 1 FROM packages
CROSS JOIN package_files spf CROSS JOIN so_needed CROSS JOIN rpm_requires
CROSS JOIN package_requires_res req_res CROSS JOIN package_files tpf
WHERE so_needed.obj_file_id = spf.id AND spf.package_id = packages.id AND
packages.repodir_id = ? AND spf.package_id = rpm_requires.package_id AND
(so_needed.name = rpm_requires.name OR
so_needed.name || '()(64bit)' = rpm_requires.name) AND
packages.id = req_res.package_id AND
rpm_requires.id = req_res.requires_id AND
req_res.dep_package_id = tpf.package_id AND
so_needed.name = tpf.basename
2014-02-06 11:44:02 +04:00
""", [repodir_id])
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
2014-02-20 09:32:26 +04:00
objects_not_resolved1 = conn.execute("""
2014-02-06 11:44:02 +04:00
SELECT packages.id AS package_id, packages.nvra,
package_files.id AS object_id, package_files.basename AS object_name,
so_needed.id AS so_needed_id, so_needed.name AS so_needed_name
FROM packages CROSS JOIN package_files CROSS JOIN so_needed
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND
NOT EXISTS (SELECT 1 FROM so_needed_res
WHERE so_needed_res.so_needed_id = so_needed.id)
2014-02-06 11:44:02 +04:00
ORDER BY packages.nvra, package_files.basename, so_needed.name
""", [repodir_id]).fetchall()
print 'Object files not resolved by rpm requires-provides: ', len(objects_not_resolved1)
2014-02-06 11:44:02 +04:00
if objects_not_resolved1:
print 'Searching object files resolutions (2)...'
in_so_needed = ','.join(str(obj_rec[4]) for obj_rec in objects_not_resolved1)
2014-02-20 09:32:26 +04:00
conn.execute("""
2014-02-06 11:44:02 +04:00
INSERT INTO so_needed_res(so_needed_id, dep_obj_file_id, res_type)
SELECT so_needed.id, tpf.id, 2 FROM packages, package_files tpf, so_needed
WHERE packages.repodir_id IN (%s) AND packages.id = tpf.package_id AND
so_needed.id IN (%s) AND tpf.basename = so_needed.name
""" % (in_repodirs, in_so_needed))
2014-02-20 09:32:26 +04:00
objects_not_resolved2 = conn.execute("""
2014-02-06 11:44:02 +04:00
SELECT packages.id AS package_id, packages.nvra,
package_files.id AS object_id, package_files.basename AS object_name,
so_needed.id AS so_needed_id, so_needed.name AS so_needed_name
FROM packages, package_files, so_needed
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND
NOT EXISTS (SELECT 1 FROM so_needed_res WHERE so_needed_res.so_needed_id = so_needed.id)
ORDER BY packages.nvra, package_files.basename, so_needed.name
""", [repodir_id]).fetchall()
print 'Object files not resolved: ', len(objects_not_resolved2)
2014-02-20 09:32:26 +04:00
def process_repodir_obj_symbols(conn, repodir_id, repodir_name, repodir_depends):
2014-02-06 11:44:02 +04:00
print 'Searching symbols resolutions (1)...'
# EXPLAIN QUERY PLAN
2014-02-20 09:32:26 +04:00
conn.execute("""
2014-02-06 11:44:02 +04:00
INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type)
SELECT sos.id, tos.id, 1 FROM packages CROSS JOIN package_files spf CROSS JOIN obj_symbols sos CROSS JOIN
so_needed CROSS JOIN so_needed_res CROSS JOIN package_files tpf CROSS JOIN obj_symbols tos
WHERE packages.repodir_id = ? AND packages.id = spf.package_id AND spf.id = sos.obj_file_id AND
2014-02-06 11:44:02 +04:00
sos.sym_type = 0 AND sos.obj_file_id = so_needed.obj_file_id AND so_needed.id = so_needed_res.so_needed_id AND
so_needed_res.res_type = 1 AND so_needed_res.dep_obj_file_id = tpf.id AND
(tos.obj_file_id = tpf.id OR tos.obj_file_id = tpf.link_to_file_id) AND
2014-02-06 11:44:02 +04:00
tos.sym_type = 1 AND tos.name = sos.name
""", [repodir_id])
print 'Searching symbols resolutions (2)...'
2014-02-20 09:32:26 +04:00
conn.execute("""
2014-02-06 11:44:02 +04:00
INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type)
SELECT sos.id, tos.id, 2 FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols sos CROSS JOIN
so_needed CROSS JOIN so_needed_res CROSS JOIN obj_symbols tos
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = sos.obj_file_id AND
sos.sym_type = 0 AND sos.obj_file_id = so_needed.obj_file_id AND so_needed.id = so_needed_res.so_needed_id AND
so_needed_res.res_type = 2 AND so_needed_res.dep_obj_file_id = tos.obj_file_id AND
tos.sym_type = 1 AND tos.name = sos.name
""", [repodir_id])
print 'Searching symbols resolutions (3)...'
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
2014-02-20 09:32:26 +04:00
conn.execute("""
2014-02-06 11:44:02 +04:00
INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type)
SELECT sos.id, tos.id, 3 FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols sos CROSS JOIN
obj_symbols tos CROSS JOIN package_files tpf
WHERE repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = sos.obj_file_id AND
sos.sym_type = 0 AND NOT EXISTS (SELECT 1 FROM obj_symbols_res WHERE obj_sym_id = sos.id) AND
sos.name = tos.name AND tos.sym_type = 1 AND tos.obj_file_id = tpf.id AND
tpf.package_id IN (SELECT id FROM packages WHERE repodir_id IN (%s))
""" % in_repodirs, [repodir_id])
2014-02-20 09:32:26 +04:00
def process_repodir_sources(conn, repodir_id, repo_sources):
if not repo_sources:
return
print 'Searching source rpms...'
conn.execute("""
UPDATE packages SET sourcerpm_package =
(SELECT id FROM packages ps
WHERE repodir_id IN (SELECT id FROM repodirs WHERE name = ?) AND
ps.nvra = substr(packages.sourcerpm, 1, length(packages.sourcerpm) - 4)
)
WHERE repodir_id = ? AND sourcerpm LIKE '%.rpm'
""", [repo_sources, repodir_id])
def process_repodir(conn, repo_id, repo_name, repo_sources,
depend_repodir_list, repodirs_processed, dep_arch):
2014-02-06 11:44:02 +04:00
all_depends_ready = True
repodir_depends = []
in_repodirs = ','.join(str(id) for id in repodirs_processed)
for dr_name in depend_repodir_list:
2014-02-20 09:32:26 +04:00
repodir_depend_found = conn.execute("""
2014-02-06 11:44:02 +04:00
SELECT id, name FROM repodirs WHERE id IN (%s) AND name = ?
""" % in_repodirs, [dr_name]).fetchall()
if len(repodir_depend_found) == 0:
all_depends_ready = False
break
else:
for rdf in repodir_depend_found:
repodir_depends.append(rdf[0])
if not all_depends_ready:
return False
print repo_name, ' ', depend_repodir_list, ' ', dep_arch
2014-02-20 09:32:26 +04:00
process_repodir_dependencies(conn, repo_id, repo_name, repodir_depends, 'conflicts')
process_repodir_dependencies(conn, repo_id, repo_name, repodir_depends, 'obsoletes')
process_repodir_requires(conn, repo_id, repo_name, repodir_depends, dep_arch)
process_repodir_file_links(conn, repo_id, repo_name, repodir_depends)
process_repodir_so_needed(conn, repo_id, repo_name, repodir_depends)
process_repodir_obj_symbols(conn, repo_id, repo_name, repodir_depends)
process_repodir_sources(conn, repo_id, repo_sources)
2014-02-06 11:44:02 +04:00
return True
2014-02-20 09:32:26 +04:00
def process_repodb(conn):
2014-02-06 11:44:02 +04:00
2014-02-20 09:32:26 +04:00
def extract_arch(arch_template, repo_name):
arch_sign = '$arch'
spos = arch_template.find(arch_sign)
if spos >= 0:
repo_prefix = arch_template[:spos]
repo_postfix = arch_template[spos + len(arch_sign):]
if repo_name.startswith(repo_prefix) and \
repo_name.endswith(repo_postfix):
return repo_name[len(repo_prefix) :
len(repo_name) - len(repo_postfix)]
return None
2014-02-06 11:44:02 +04:00
2014-02-20 09:32:26 +04:00
conn.executescript("""
DELETE FROM package_requires_res;
DELETE FROM package_conflicts_res;
DELETE FROM package_obsoletes_res;
DELETE FROM so_needed_res;
DELETE FROM obj_symbols_res;
UPDATE package_files SET link_to_file_id = NULL;
UPDATE packages SET sourcerpm_package = NULL;
ANALYZE;
""")
2014-02-06 11:44:02 +04:00
repodirs_processed = []
#Process binary rpms
repodirs_processed_cnt = -1
while repodirs_processed_cnt < len(repodirs_processed):
in_repodirs = ','.join(str(id) for id in repodirs_processed)
2014-02-20 09:32:26 +04:00
repodirs = conn.execute("""
SELECT id, name, sources FROM repodirs WHERE sources <> '.' AND id NOT IN (%s)
2014-02-06 11:44:02 +04:00
""" % in_repodirs).fetchall()
for repodir in repodirs:
2014-02-20 09:32:26 +04:00
(repodir_id, repodir_name, repodir_sources) = \
(repodir[0], repodir[1], repodir[2])
depend_repodir_names = conn.execute(
2014-02-06 11:44:02 +04:00
"""
SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ?
2014-02-20 09:32:26 +04:00
""", [repodir_id]).fetchall()
2014-02-06 11:44:02 +04:00
depend_repodir_list = [drn[0] for drn in depend_repodir_names]
2014-02-20 09:32:26 +04:00
if process_repodir(conn, repodir_id, repodir_name, repodir_sources,
depend_repodir_list, repodirs_processed, None):
repodirs_processed.append(repodir_id)
2014-02-06 11:44:02 +04:00
repodirs_processed_cnt = len(repodirs_processed)
#Process SRPMS
repodirs_processed_cnt = -1
while repodirs_processed_cnt < len(repodirs_processed):
2014-02-20 09:32:26 +04:00
repodirs = conn.execute("""
SELECT id, name, sources FROM repodirs WHERE sources = '.'
2014-02-06 11:44:02 +04:00
""").fetchall()
for repodir in repodirs:
2014-02-20 09:32:26 +04:00
(repodir_id, repodir_name, repodir_sources) = \
(repodir[0], repodir[1], repodir[2])
2014-02-06 11:44:02 +04:00
src_build_archs = []
2014-02-20 09:32:26 +04:00
depend_repodir_names = conn.execute(
2014-02-06 11:44:02 +04:00
"""
SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ?
2014-02-20 09:32:26 +04:00
""", [repodir_id]).fetchall()
2014-02-06 11:44:02 +04:00
for drn in depend_repodir_names:
dr_name = drn[0]
if '$arch' in dr_name:
2014-02-20 09:32:26 +04:00
depend_repodir_found = conn.execute(
2014-02-06 11:44:02 +04:00
"""
SELECT id, name FROM repodirs WHERE name LIKE ?
""", [dr_name.replace('$arch', '%')]).fetchall()
if len(depend_repodir_found) == 0:
raise Exception('Dependancy repositories not found!')
for drf in depend_repodir_found:
arch = extract_arch(dr_name, drf[1])
if arch:
if arch == 'SRPMS':
continue
src_build_archs.append(arch)
else:
2014-02-20 09:32:26 +04:00
raise Exception('Source repository should depend '
'on */$arch/* repo.')
2014-02-06 11:44:02 +04:00
for arch in src_build_archs:
depend_repodir_list = [drn[0].replace('$arch', arch)
for drn in depend_repodir_names]
2014-02-20 09:32:26 +04:00
if not process_repodir(conn, repodir_id, repodir_name, None,
depend_repodir_list, repodirs_processed,
arch):
2014-02-06 11:44:02 +04:00
raise Exception('Couldn\'t process SRPMS repository!')
2014-02-20 09:32:26 +04:00
repodirs_processed.append(repodir_id)
2014-02-06 11:44:02 +04:00
repodirs_processed_cnt = len(repodirs_processed)
in_repodirs = ','.join(str(id) for id in repodirs_processed)
2014-02-20 09:32:26 +04:00
repodirs_not_processed = conn.execute("""
SELECT id, name, sources FROM repodirs rd WHERE id NOT IN (%s)
2014-02-06 11:44:02 +04:00
""" % in_repodirs).fetchall()
if len(repodirs_not_processed) > 0:
print 'Repodirs not processed due to dependencies:'
for rdna in repodirs_not_processed:
print rdna[1]
2014-02-20 09:32:26 +04:00
conn.execute("""
2014-02-06 11:44:02 +04:00
ANALYZE""")
conn.commit()
2014-02-20 09:32:26 +04:00
def main(args):
global NUM_PROCESSES
if hasattr(os, "sysconf"):
if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
nproc = os.sysconf("SC_NPROCESSORS_ONLN")
if isinstance(nproc, int) and nproc > 0:
NUM_PROCESSES = nproc
options = parse_args()
if os.path.exists(DB):
os.unlink(DB)
conn = sqlite3.connect(DB)
if import_repositories(options, conn):
process_repodb(conn)
else:
os.unlink(DB)
2014-02-06 11:44:02 +04:00
if __name__ == "__main__":
main(sys.argv)