Refactoring; Implementing file_conflicts and provides_conflicts reports

This commit is contained in:
Alexander Lakhin 2014-02-14 15:40:49 +04:00
parent fc2df4ccb5
commit 6bdb49da9d
5 changed files with 507 additions and 375 deletions

View file

@ -9,7 +9,7 @@ import string
import rpm
import re
import gettext
from versutils import *
prepare_repodb = __import__("prepare-repodb")
gettext.install('urpm-tools')
@ -72,7 +72,7 @@ SELECT id, nvra, version, release FROM packages WHERE repodir_id IN (%s) AND nam
verrel = (vpid[2] if vpid[2] is not None else '') + '-' + \
(vpid[3] if vpid[3] is not None else '')
if not better_found:
better_found = version_ok(chosen_version,
better_found = prepare_repodb.version_ok(chosen_version,
RPMSENSE_GREATER,
verrel)
if better_found:
@ -91,9 +91,9 @@ SELECT id, nvra, version, release FROM packages WHERE repodir_id IN (%s) AND nam
in_packages = ','.join(str(id) for id in package_ids)
for required_package in c.execute("""
SELECT packages.id, packages.name, nvra
FROM packages, package_depend_res
WHERE packages.id = package_depend_res.dep_package_id AND
package_depend_res.package_id IN (%s) AND
FROM packages, package_requires_res
WHERE packages.id = package_requires_res.dep_package_id AND
package_requires_res.package_id IN (%s) AND
packages.repodir_id IN (%s) AND
packages.id NOT IN (%s)
ORDER BY packages.name

View file

@ -65,9 +65,9 @@ SELECT packages.id, nvra, repodir_id, repodirs.name,
FROM packages, repodirs, rpm_requires
WHERE packages.repodir_id = repodirs.id AND
packages.id = rpm_requires.package_id AND
NOT EXISTS (SELECT 1 FROM package_depend_res pdr
WHERE pdr.package_id = packages.id AND
pdr.requires_id = rpm_requires.id)
NOT EXISTS (SELECT 1 FROM package_requires_res pqr
WHERE pqr.package_id = packages.id AND
pqr.requires_id = rpm_requires.id)
ORDER BY repodir_id, nvra""").fetchall()
all_broken = {}
@ -118,11 +118,11 @@ SELECT packages.id, nvra, repodir_id, repodirs.name,
packages_broken_recurs = dbc.execute("""
SELECT packages.id, nvra, repodir_id, repodirs.name,
rpm_requires.name AS req_name, build_arch, dep_package_id
FROM packages, repodirs, rpm_requires, package_depend_res
FROM packages, repodirs, rpm_requires, package_requires_res
WHERE packages.repodir_id = repodirs.id AND
packages.id = rpm_requires.package_id AND
packages.id = package_depend_res.package_id AND
rpm_requires.id = package_depend_res.requires_id AND
packages.id = package_requires_res.package_id AND
rpm_requires.id = package_requires_res.requires_id AND
dep_package_id IN (%(pids)s) AND
packages.id NOT IN (%(pids)s)
ORDER BY repodir_id, nvra""" % {'pids': pids}).fetchall()
@ -143,7 +143,7 @@ SELECT id, name, sources FROM repodirs ORDER BY id""").fetchall()
reponames = {repodir[0]: repodir[1] for repodir in all_repodirs}
if broken_recursive:
print 'Recursive broken dependencies:'
print '===\nRecursive broken dependencies:'
for rd in all_repodirs:
(rd_id, rd_name, rd_sources) = (rd[0], rd[1], rd[2])
if rd_sources == '.':
@ -223,9 +223,9 @@ SELECT id, name, sources, path FROM repodirs ORDER BY id
(rd_id, rd_name) = (repodir[0], repodir[1])
loopbacks = dbc.execute("""
SELECT p.id, p.nvra, rpm_requires.name
FROM package_depend_res pdr, packages p, rpm_requires
WHERE pdr.package_id = p.id AND pdr.package_id = dep_package_id AND
rpm_requires.id = pdr.requires_id and p.repodir_id = ?
FROM package_requires_res pqr, packages p, rpm_requires
WHERE pqr.package_id = p.id AND pqr.package_id = dep_package_id AND
rpm_requires.id = pqr.requires_id and p.repodir_id = ?
ORDER BY p.nvra, rpm_requires.name
""", [rd_id]).fetchall()
if loopbacks:
@ -253,27 +253,6 @@ SELECT p.id, p.nvra, rpm_requires.name
print 'Total: %d' % cnt
def detect_lost_sources(dbc):
print '==='
print 'Lost sources:'
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs WHERE sources <> '.' ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
lost_sources = dbc.execute("""
SELECT name, nvra, sourcerpm FROM packages
WHERE repodir_id = ? AND
sourcerpm IS NOT NULL AND sourcerpm_package IS NULL
ORDER BY name
""", [rd_id]).fetchall()
if lost_sources:
print '%d) %s' % (rd_id, rd_name)
for ls in lost_sources:
print '\t%s (%s)' % (ls[1], ls[2])
print 'Total: %d' % len(lost_sources)
def analyze_partitioning(dbc):
print '==='
print 'Possible partitioning:'
@ -310,20 +289,20 @@ SELECT id, name, nvra
part_packages = ','.join(str(id) for id in partition_ids)
upper_packages = dbc.execute("""
SELECT packages.id, packages.name, nvra
FROM packages, package_depend_res
WHERE packages.id = package_depend_res.package_id AND
FROM packages, package_requires_res
WHERE packages.id = package_requires_res.package_id AND
packages.repodir_id = ? AND
package_depend_res.dep_package_id IN (%s) AND
package_requires_res.dep_package_id IN (%s) AND
packages.id NOT IN (%s)
ORDER BY packages.name
""" % (cl_packages, part_packages), [rd_id]).fetchall()
lower_packages = dbc.execute("""
SELECT packages.id, packages.name, nvra
FROM packages, package_depend_res
WHERE packages.id = package_depend_res.dep_package_id AND
FROM packages, package_requires_res
WHERE packages.id = package_requires_res.dep_package_id AND
packages.repodir_id = ? AND
package_depend_res.package_id IN (%s) AND
package_requires_res.package_id IN (%s) AND
packages.id NOT IN (%s)
ORDER BY packages.name
""" % (cl_packages, part_packages), [rd_id]).fetchall()
@ -361,161 +340,256 @@ SELECT packages.id, packages.name, nvra
print '\t%s' % s
print 'Total: %d' % len(singles)
def detect_lost_object_files(dbc):
header = '===\n' \
'Lost object (executable) files (provided but not found):'
repodirs = dbc.execute("""
class query_output:
repositories = {}
repodirs = None
title = None
dbc = None
def __init__(self, dbc):
self.dbc = dbc
self.repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
for repodir in repodirs:
for repodir in self.repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
lost_object_files = dbc.execute("""
SELECT nvra, package_files.path, mark
(output, count) = self.get_data(rd_id)
repo_out_dict = {'output': output, 'count': count}
self.repositories[rd_id] = repo_out_dict
def get_repodir_name(self, repodir_id):
for repodir in self.repodirs:
if repodir[0] == repodir_id:
return repodir[1]
return None
def print_text(self):
title_printed = False
for repodir in self.repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
if rd_id not in self.repositories:
continue
repodir = self.repositories[rd_id]
repo_output = repodir['output']
if repo_output is None or len(repo_output) > 0:
if self.title and not title_printed:
print '===\n' + self.title + ':'
title_printed = True
self.print_text_reponame(rd_id, rd_name)
if repo_output is not None:
for line in repo_output:
print '\t' + line
print 'Total: %d' % repodir['count']
def print_text_reponame(self, repodir_id, repodir_name):
print '%d) %s' % (repodir_id, repodir_name)
def get_data(self, repodir_id):
return (None, 0)
class query_output_rows(query_output):
query = ''
def get_data(self, repodir_id):
rows = self.dbc.execute(self.query, [repodir_id]).fetchall()
result = [row[0] for row in rows]
return (result, len(rows))
class query_output_count(query_output):
query = None
def get_data(self, repodir_id):
count_rec = self.dbc.execute(self.query, [repodir_id]).fetchone()
return (None, count_rec[0])
def print_text_reponame(self, repodir_id, repodir_name):
print '%d) %s: %d' % (repodir_id, repodir_name,
self.repositories[repodir_id]['count'])
class lost_sources(query_output_rows):
title = 'Lost sources'
query = """
SELECT nvra || ' (' || sourcerpm || ')' FROM packages
WHERE repodir_id = ? AND
sourcerpm IS NOT NULL AND sourcerpm_package IS NULL
ORDER BY name
"""
class lost_object_files(query_output_rows):
title = 'Lost object (executable) files (provided but not found)'
query = """
SELECT nvra || ': ' || package_files.path
FROM packages, package_files
WHERE repodir_id = ? AND packages.id = package_files.package_id AND mark = 'not-found'
ORDER BY packages.name, package_files.path
""", [rd_id]).fetchall()
if lost_object_files:
if header:
print header
header = None
print '%d) %s' % (rd_id, rd_name)
for lof in lost_object_files:
print '\t%s: %s' % (lof[0], lof[1])
print 'Total: %d' % len(lost_object_files)
"""
def detect_broken_object_links(dbc):
header = '===\n' \
'Invalid object (executable) file links:'
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
broken_object_links = dbc.execute("""
SELECT nvra, package_files.path, link_to_path, mark
class broken_object_links(query_output_rows):
title = 'Invalid object (executable) file links'
query = """
SELECT nvra || ': ' || package_files.path || ' -/-> ' || link_to_path
FROM packages, package_files
WHERE repodir_id = ? AND packages.id = package_files.package_id AND
mark = 'link' AND link_to_path IS NOT NULL AND link_to_file_id IS NULL
ORDER BY packages.name, package_files.path
""", [rd_id]).fetchall()
if broken_object_links:
if header:
print header
header = None
print '%d) %s' % (rd_id, rd_name)
for bol in broken_object_links:
print '\t%s: %s -/-> %s' % \
(bol[0], bol[1], bol[2])
print 'Total: %d' % len(broken_object_links)
"""
def get_repodir_depends(dbc, repodir_id):
def print_reponame_with_depends(dbc, repodir_id, repodir_name):
dep_repos = dbc.execute("""
SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ?
""", [repodir_id]).fetchall()
return ', '.join([dep_repo[0] for dep_repo in dep_repos])
repodir_depends = ', '.join([dep_repo[0] for dep_repo in dep_repos])
print ('%d) %s' % (repodir_id, repodir_name)) + \
('' if repodir_depends == '' else
(' (depends on: %s)' % repodir_depends))
def detect_so_needed_not_resolved(dbc):
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
print '==='
print 'Objects needed and resolved by rpm requires-provides:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
objects_needed_resolved1 = dbc.execute("""
class so_needed_resolved1(query_output_count):
title = 'Objects needed and resolved by rpm requires-provides'
query = """
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROSS JOIN so_needed_res
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND so_needed_id = so_needed.id AND res_type = 1
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, objects_needed_resolved1[0])
"""
print '==='
print 'Objects needed and resolved by flat search:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
objects_needed_resolved2 = dbc.execute("""
class so_needed_resolved2(query_output_count):
title = 'Objects needed and resolved by flat search'
query = """
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROSS JOIN so_needed_res
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND so_needed_id = so_needed.id AND res_type = 2
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, objects_needed_resolved2[0])
"""
header = '===' \
'Objects needed but not resolved:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
objects_needed_not_resolved = dbc.execute("""
SELECT packages.nvra, package_files.path, so_needed.name
class so_needed_not_resolved(query_output_rows):
title = 'Objects needed but not resolved'
query = """
SELECT packages.nvra || ': ' || package_files.path || ' -?-> ' || so_needed.name
FROM packages CROSS JOIN package_files CROSS JOIN so_needed
LEFT OUTER JOIN so_needed_res ON so_needed_id = so_needed.id
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND so_needed_id IS NULL
""", [rd_id]).fetchall()
if objects_needed_not_resolved:
repodir_depends = get_repodir_depends(dbc, rd_id)
if header:
print header
header = None
print ('%d) %s' % (rd_id, rd_name)) + \
('' if repodir_depends == '' else
(' (depends on: %s)' % repodir_depends))
for obj_nr in objects_needed_not_resolved:
print '\t%s: %s -?-> %s' % (obj_nr[0], obj_nr[1], obj_nr[2])
print 'Total: %d' % len(objects_needed_not_resolved)
"""
def print_reponame(self, repodir_id, repodir_name):
print_reponame_with_depends(self.dbc, repodir_id, repodir_name)
def detect_symbols_not_resolved(dbc):
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
print '==='
print 'Symbols resolved by .so NEEDED search:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
symbols_resolved1_2 = dbc.execute("""
class symbols_resolved1_2(query_output_count):
title = 'Symbols resolved by .so NEEDED search'
query = """
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CROSS JOIN obj_symbols_res
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND
package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND
obj_symbols_res.obj_sym_id = obj_symbols.id AND res_type IN (1, 2)
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, symbols_resolved1_2[0])
"""
print '==='
print 'Symbols resolved by flat search:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
symbols_resolved3 = dbc.execute("""
class symbols_resolved3(query_output_count):
title = 'Symbols resolved by flat search'
query = """
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CROSS JOIN obj_symbols_res
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND
package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND
obj_symbols_res.obj_sym_id = obj_symbols.id AND res_type = 3
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, symbols_resolved3[0])
"""
header = '===' \
'Symbols not resolved:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
symbols_not_resolved = dbc.execute("""
SELECT packages.nvra, package_files.path, obj_symbols.name
class symbols_not_resolved(query_output_rows):
title = 'Symbols not resolved'
query = """
SELECT packages.nvra || ': ' || package_files.path || ' -?-> ' || obj_symbols.name
FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND
package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND
NOT EXISTS (SELECT 1 FROM obj_symbols_res WHERE obj_sym_id = obj_symbols.id)
""", [rd_id]).fetchall()
if symbols_not_resolved:
repodir_depends = get_repodir_depends(dbc, rd_id)
if header:
print header
header = None
print ('%d) %s' % (rd_id, rd_name)) + \
('' if repodir_depends == '' else
(' (depends on: %s)' % repodir_depends))
for sym_nr in symbols_not_resolved:
print '\t%s: %s -?-> %s' % (sym_nr[0], sym_nr[1], sym_nr[2])
print 'Total: %d' % len(symbols_not_resolved)
"""
def print_reponame(self, repodir_id, repodir_name):
print_reponame_with_depends(self.dbc, repodir_id, repodir_name)
class file_conflicts(query_output):
title = 'File conflicts'
def get_data(self, repodir_id):
rows = self.dbc.execute("""
SELECT spf.path, sp.nvra, tp.nvra, tp.repodir_id
FROM packages sp, package_files spf, package_files tpf, packages tp
WHERE sp.id = spf.package_id AND sp.repodir_id = ? AND
spf.mode <> -1 AND (spf.mode & 16384) == 0 AND
spf.path = tpf.path AND spf.package_id <> tpf.package_id AND tpf.mode <> -1 AND
tpf.package_id = tp.id AND
tp.name <> sp.name AND
(tp.repodir_id == sp.repodir_id OR
tp.repodir_id IN
(SELECT rd.id FROM repodirs rd, repodir_depends rdd
WHERE depend_repodir_name = rd.name AND rdd.repodir_id = sp.repodir_id)) AND
NOT EXISTS (
SELECT 1 FROM package_conflicts_res pcr
WHERE (sp.id = pcr.package_id AND tp.id = pcr.dep_package_id) OR (tp.id = pcr.package_id AND sp.id = pcr.dep_package_id)
) AND
NOT EXISTS (
SELECT 1 FROM package_obsoletes_res por
WHERE (sp.id = por.package_id AND tp.id = por.dep_package_id) OR (tp.id = por.package_id AND sp.id = por.dep_package_id)
)
ORDER BY spf.path, sp.nvra, tp.nvra
""", [repodir_id]).fetchall()
# tp.name <> sp.name condition added to exclude different versions of the same package
pre_path = None
path_packages = []
result = []
for row in rows:
(path, pkg1, pkg2) = (row[0], row[1], row[2])
if row[3] != repodir_id:
pkg2 += ' {%s}' % self.get_repodir_name(row[3])
if path != pre_path and pre_path is not None:
result.append('%s: %s' % (pre_path, '; '.join(path_packages)))
path_packages = []
if pkg1 not in path_packages:
path_packages.append(pkg1)
if pkg2 not in path_packages:
path_packages.append(pkg2)
pre_path = path
if pre_path is not None:
result.append('%s: %s' % (pre_path, '; '.join(path_packages)))
return (result, len(result))
class provides_conflicts(query_output):
title = 'Provided capabilities conflicts'
def get_data(self, repodir_id):
rows = self.dbc.execute("""
SELECT srp.name, sp.nvra, tp.nvra, tp.repodir_id FROM packages sp, rpm_provides srp, rpm_provides trp, packages tp
WHERE sp.id = srp.package_id AND sp.repodir_id = ? AND
srp.name = trp.name AND srp.package_id <> trp.package_id AND
trp.package_id = tp.id AND
tp.name <> sp.name AND
(tp.repodir_id == sp.repodir_id OR
tp.repodir_id IN
(SELECT rd.id FROM repodirs rd, repodir_depends rdd
WHERE depend_repodir_name = rd.name AND rdd.repodir_id = sp.repodir_id)) AND
NOT EXISTS (
SELECT 1 FROM package_conflicts_res pcr
WHERE (sp.id = pcr.package_id AND tp.id = pcr.dep_package_id) OR (tp.id = pcr.package_id AND sp.id = pcr.dep_package_id)
) AND
NOT EXISTS (
SELECT 1 FROM package_obsoletes_res por
WHERE (sp.id = por.package_id AND tp.id = por.dep_package_id) OR (tp.id = por.package_id AND sp.id = por.dep_package_id)
) AND
EXISTS (SELECT 1 FROM rpm_requires WHERE rpm_requires.name = srp.name)
ORDER BY srp.name, sp.nvra, tp.nvra
""", [repodir_id]).fetchall()
# tp.name <> sp.name condition added to exclude different versions of the same package
pre_path = None
path_packages = []
result = []
for row in rows:
(path, pkg1, pkg2) = (row[0], row[1], row[2])
if row[3] != repodir_id:
pkg2 += ' {%s}' % self.get_repodir_name(row[3])
if path != pre_path and pre_path is not None:
result.append('%s: %s' % (pre_path, '; '.join(path_packages)))
path_packages = []
if pkg1 not in path_packages:
path_packages.append(pkg1)
if pkg2 not in path_packages:
path_packages.append(pkg2)
pre_path = path
if pre_path is not None:
result.append('%s: %s' % (pre_path, '; '.join(path_packages)))
return (result, len(result))
def main(args):
options = parseargs()
@ -524,12 +598,18 @@ def main(args):
dbc = conn.cursor()
detect_broken_dependencies(dbc, options.dot_graphs)
#detect_loops(dbc)
detect_lost_sources(dbc)
analyze_partitioning(dbc)
detect_lost_object_files(dbc)
detect_broken_object_links(dbc)
detect_so_needed_not_resolved(dbc)
detect_symbols_not_resolved(dbc)
lost_sources(dbc).print_text()
lost_object_files(dbc).print_text()
broken_object_links(dbc).print_text()
so_needed_resolved1(dbc).print_text()
so_needed_resolved2(dbc).print_text()
so_needed_not_resolved(dbc).print_text()
symbols_resolved1_2(dbc).print_text()
symbols_resolved3(dbc).print_text()
symbols_not_resolved(dbc).print_text()
file_conflicts(dbc).print_text()
provides_conflicts(dbc).print_text()
conn.close()
if __name__ == "__main__":

View file

@ -21,7 +21,7 @@ gettext.install('urpm-tools')
DB = 'repo.db'
NUM_PROCESSES = 4 # number of CPU's (evaluated automatically)
NUM_PROCESSES = 1 # number of CPU's (evaluated automatically)
RPMFILEMODE_DIRECTORY = 0x4000
RPMFILEMODE_EXECUTE = 0111
@ -64,35 +64,35 @@ def to_string(rpm, tag, val):
return str(val).decode('utf-8', 'replace')
def init_database(conn):
conn.execute("""
conn.executescript("""
CREATE TABLE repodirs(id INTEGER PRIMARY KEY NOT NULL,
name TEXT UNIQUE, path TEXT, arch TEXT, sources TEXT)""")
conn.execute("""
name TEXT UNIQUE, path TEXT, arch TEXT, sources TEXT);
CREATE TABLE repodir_depends(id INTEGER PRIMARY KEY NOT NULL,
repodir_id INTEGER, depend_repodir_name TEXT)""")
conn.execute("""
repodir_id INTEGER, depend_repodir_name TEXT);
CREATE TABLE IF NOT EXISTS package_files(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER NOT NULL, basename TEXT, path TEXT,
size INTEGER, mode INTEGER,
link_to_file_id INTEGER, link_to_path TEXT, mark TEXT)""")
conn.execute("""
CREATE TABLE package_depend_res(id INTEGER PRIMARY KEY NOT NULL,
link_to_file_id INTEGER, link_to_path TEXT, mark TEXT);
CREATE TABLE package_requires_res(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER, requires_id INTEGER,
provides_id INTEGER, dep_package_id INTEGER)""")
conn.execute("""
provides_id INTEGER, dep_package_id INTEGER);
CREATE TABLE package_conflicts_res(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER, conflicts_id INTEGER,
provides_id INTEGER, dep_package_id INTEGER);
CREATE TABLE package_obsoletes_res(id INTEGER PRIMARY KEY NOT NULL,
package_id INTEGER, obsoletes_id INTEGER,
provides_id INTEGER, dep_package_id INTEGER);
CREATE TABLE so_needed(id INTEGER PRIMARY KEY NOT NULL,
obj_file_id INTEGER, name TEXT)""")
conn.execute("""
obj_file_id INTEGER, name TEXT);
CREATE TABLE so_needed_res(id INTEGER PRIMARY KEY NOT NULL,
so_needed_id INTEGER, dep_obj_file_id INTEGER, res_type INTEGER)""")
conn.execute("""
so_needed_id INTEGER, dep_obj_file_id INTEGER, res_type INTEGER);
CREATE TABLE obj_symbols(id INTEGER PRIMARY KEY NOT NULL,
obj_file_id INTEGER, name TEXT, sym_type INTEGER)""")
conn.execute("""
obj_file_id INTEGER, name TEXT, sym_type INTEGER);
CREATE TABLE obj_symbols_res(id INTEGER PRIMARY KEY NOT NULL,
obj_sym_id INTEGER, dep_obj_sym_id INTEGER, res_type INTEGER)""")
conn.execute("""PRAGMA synchronous = OFF""")
conn.execute("""PRAGMA journal_mode = OFF""")
obj_sym_id INTEGER, dep_obj_sym_id INTEGER, res_type INTEGER);
PRAGMA synchronous = OFF;
PRAGMA journal_mode = OFF;
""")
def index_database(conn):
print 'Indexing the database...'
@ -103,8 +103,12 @@ CREATE INDEX pkg_nvra ON packages(nvra);
CREATE INDEX pkg_arch ON packages(arch);
CREATE INDEX pkg_group ON packages(rpm_group);
CREATE INDEX pkg_repodir ON packages(repodir_id);
CREATE INDEX pkg_dr_pkg_req ON package_depend_res(package_id, requires_id);
CREATE INDEX pkg_dr_pkg_prov ON package_depend_res(dep_package_id, provides_id);
CREATE INDEX pkg_rq_pkg_req ON package_requires_res(package_id, requires_id);
CREATE INDEX pkg_rq_pkg_prov ON package_requires_res(dep_package_id, provides_id);
CREATE INDEX pkg_cf_pkg_conf ON package_conflicts_res(package_id, conflicts_id);
CREATE INDEX pkg_cf_pkg_prov ON package_conflicts_res(dep_package_id, provides_id);
CREATE INDEX pkg_ob_pkg_obs ON package_obsoletes_res(package_id, obsoletes_id);
CREATE INDEX pkg_ob_pkg_prov ON package_obsoletes_res(dep_package_id, provides_id);
CREATE INDEX pkg_file_pkg_id ON package_files(package_id);
CREATE INDEX pkg_file_name ON package_files(basename);
CREATE INDEX pkg_file_path ON package_files(path);
@ -356,7 +360,7 @@ def process_package_worker(num, queue_in, generator, gen_lock, db_struct,
files_dirs[file_path] = False
continue
dir_name = os.path.dirname(file_path)
if dir_name not in files_dirs:
if dir_name != '' and dir_name not in files_dirs:
files_dirs[dir_name] = True
if no_shared_objects:
continue
@ -687,6 +691,7 @@ CREATE TABLE IF NOT EXISTS %s (id INTEGER PRIMARY KEY NOT NULL,
def main(args):
global NUM_PROCESSES
if os.path.exists(DB):
os.unlink(DB)

View file

@ -7,10 +7,210 @@ import gettext
import argparse
import sqlite3
import re
from versutils import *
import rpm
RPMSENSE_LESS = 0x02
RPMSENSE_GREATER = 0x04
RPMSENSE_EQUAL = 0x08
RPMSENSE_SENSEMASK = 0x0f
RPMSENSE_FIND_PROVIDES = 0x8000
RPMSENSE_MISSINGOK = 0x80000
RPMSENSE_SCRIPT_POST = 0x400
RPMSENSE_SCRIPT_PREUN = 0x800
RPMSENSE_SCRIPT_POSTUN = 0x1000
DB = 'repo.db'
def version_ok(required_version, compare_flag, candidate_version):
def sep_version(version):
vrem = re.match(r'\A(.+)(\-[^\-\:]+)(\:[^\:]+|)\Z', version)
if vrem:
return (vrem.group(1), vrem.group(2), vrem.group(3))
return (version, '', '')
def simple_version(version):
version = re.sub(r'[\-:]', '.', version)
version = re.sub(r'[a-z]+', '.', version, flags=re.I)
version = re.sub(r'\.\Z', '', version)
return version
def format_versions(ver1, ver2):
#see urpm-repoclosure, formatVersions
# v1 - provided
# v2 - required
(e1, e2) = (None, None)
e1_m = re.match(r'\A([^\-\:]+)\:(.*)', ver1)
if e1_m:
(e1, ver1) = (e1_m.group(1), e1_m.group(2))
e2_m = re.match(r'\A([^\-\:]+)\:(.*)', ver2)
if e2_m:
(e2, ver2) = (e2_m.group(1), e2_m.group(2))
(ver1_m, ver1_r, ver1_rr) = sep_version(ver1)
(ver2_m, ver2_r, ver2_rr) = sep_version(ver2)
if not ver2_rr:
ver1_rr = ''
if not ver2_r:
ver1_r = ''
ver1 = ver1_m + ver1_r + ver1_rr
ver2 = ver2_m + ver2_r + ver2_rr
if e1_m and e2_m:
ver1 = e1 + '.' + ver1
ver2 = e2 + '.' + ver2
return (simple_version(ver1), simple_version(ver2))
def cmp_nums(num1, num2):
# 00503
# 12
if num1 == num2:
return 0
lzeros1 = re.match(r'\A([0]+)([1-9].*)', num1)
if lzeros1:
(num1, num2) = (lzeros1.group(2), num2 + lzeros1.group(1))
lzeros2 = re.match(r'\A([0]+)([1-9].*)', num2)
if lzeros2:
(num2, num1) = (lzeros2.group(2), num1 + lzeros2.group(1))
diff = int(num1, 10) - int(num2, 10)
return 0 if diff == 0 else \
(1 if diff > 0 else -1)
def cmp_versions(version1, version2):
#see urpm-repoclosure, cmpVersions
# 3.2.5-5:2011.0
# NOTE: perl 5.00503 and 5.12
(v1, v2) = format_versions(version1, version2)
if v1 == v2:
return 0
v1parts = v1.split('.')
v2parts = v2.split('.')
for i in xrange(0, min(len(v1parts), len(v2parts))):
(num1, num2)= (v1parts[i], v2parts[i])
if (len(num1) > 0 and len(num2) == 0):
return 1
if (len(num1) == 0 and len(num2) > 0):
return -1
num_diff = cmp_nums(num1, num2)
if num_diff != 0:
return num_diff
if len(v1parts) < len(v2parts):
return -1
if len(v1parts) > len(v2parts):
return 1
return 0
def rpm_cmp_versions(version1, version2):
def stringToVersion(verstring):
# from rpmUtils
if verstring in [None, '']:
return (None, None, None)
e1_m = re.match(r'\A([^\-\:]+)\:(.*)', verstring)
epoch = None
if e1_m:
(epoch, verstring) = (e1_m.group(1), e1_m.group(2))
j = verstring.find('-')
if j != -1:
if verstring[:j] == '':
version = None
else:
version = verstring[:j]
release = verstring[j + 1:]
else:
if verstring == '':
version = None
else:
version = verstring
release = None
return (epoch, version, release)
(e1, v1, r1) = stringToVersion(version1)
(e2, v2, r2) = stringToVersion(version2)
if e1 is None or e2 is None:
e1 = '0'
e2 = '0'
result = rpm.labelCompare((e1, v1, r1), (e2, v2, r2))
return result
# print '===', required_version, compare_flag, candidate_version
if compare_flag == 0:
return True
if candidate_version == '*':
return True
#see urpm-repoclosure, checkDeps
if compare_flag == RPMSENSE_EQUAL and \
candidate_version == required_version:
return True
cmp_res = None
try:
cmp_res = cmp_versions(candidate_version, required_version)
except ValueError as ex:
print ('Error when comparing versions: "%s" and "%s"\n%s' %
(candidate_version, required_version, str(ex)))
return False
rpm_cmp_res = rpm_cmp_versions(candidate_version, required_version)
#if (cmp_res != rpm_cmp_res):
#print >> sys.stderr, ('Invalid compare: "%s" vs "%s"! Results: rc: %d, rpm: %d.' %
#(candidate_version, required_version, cmp_res, rpm_cmp_res))
if compare_flag == RPMSENSE_EQUAL:
return cmp_res == 0
elif compare_flag == RPMSENSE_LESS | RPMSENSE_EQUAL:
return cmp_res <= 0
elif compare_flag == RPMSENSE_GREATER | RPMSENSE_EQUAL:
return cmp_res >= 0
elif compare_flag == RPMSENSE_LESS:
return cmp_res < 0
elif compare_flag == RPMSENSE_GREATER:
return cmp_res > 0
return False
def process_repodir_dependencies(dbc, repodir_id, repodir_name, repodir_depends, dep_type):
package_depends = dbc.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
dep.id, dep.name, flags, dep.version
FROM packages, rpm_%s dep
WHERE repodir_id = ? AND dep.package_id = packages.id
ORDER BY packages.name, dep.name
""" % dep_type, [repodir_id]).fetchall()
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
dependency_cache = {}
for packdep in package_depends:
(cpackage_id, package_nvra, dep_id, dep_name, dep_flags, dep_version) = \
(packdep[0], packdep[2], packdep[3], packdep[4], packdep[5], packdep[6])
dependency_uid = dep_name + '\x00' + str(dep_flags) + '\x00' + dep_version
dep_res = dependency_cache.get(dependency_uid, None)
if dep_res is None:
dep_res = []
depend_candidates = dbc.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
prov.id, prov.name, flags, prov.version
FROM packages, rpm_provides AS prov
WHERE prov.package_id = packages.id AND repodir_id IN (%s) AND prov.name = ?
ORDER by packages.name, packages.nvra
""" % in_repodirs, [dep_name]).fetchall()
for dep_cand in depend_candidates:
(pkg_id, provides_id, provides_flags, provides_version) = \
(dep_cand[0], dep_cand[3], dep_cand[5], dep_cand[6])
if provides_flags & RPMSENSE_SENSEMASK == 0:
if not provides_version:
provides_version = '*'
else:
raise Exception('Invalid provides version '
'(flags = %d, version = %s)!' %
(provides_flags, provides_version))
if version_ok(dep_version, dep_flags & RPMSENSE_SENSEMASK,
provides_version):
dep_res.append({'prov_id': provides_id, 'pkg_id': pkg_id})
if len(dep_res) > 0:
for res_rec in dep_res:
dbc.execute("""
INSERT INTO package_%(dep)s_res(package_id, %(dep)s_id,
provides_id, dep_package_id)
VALUES (?, ?, ?, ?)""" % {'dep': dep_type}, [cpackage_id, dep_id,
res_rec.get('prov_id'), res_rec.get('pkg_id')])
dependency_cache[dependency_uid] = dep_res
def process_repodir_requires(dbc, repodir_id, repodir_name, repodir_depends, requires_build_arch):
global n
@ -37,13 +237,13 @@ SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
(cpackage_id, package_nvra, requires_id, requires_name, requires_flags, requires_version) = \
(packreq[0], packreq[2], packreq[3], packreq[4], packreq[5], packreq[6])
requirement_uid = requires_name + '\0' + str(requires_flags) + '\0' + requires_version
dep_res = requires_cache.get(requirement_uid, None)
if dep_res is None:
dep_res = []
requirement_uid = requires_name + '\x00' + str(requires_flags) + '\x00' + requires_version
req_res = requires_cache.get(requirement_uid, None)
if req_res is None:
req_res = []
if (re.match(r'\A(rpmlib|executable)\(.+\)\Z', requires_name)):
# see if($N=~/\A(rpmlib|executable)\(.+\)\Z/) in urpm_repoclosure.pl
dep_res.append({})
req_res.append({})
else:
depend_candidates = dbc.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
@ -83,11 +283,11 @@ SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
if provides_version == preferred_version or \
version_ok(provides_version, RPMSENSE_EQUAL,
preferred_version):
dep_res.append({'prov_id': provides_id,
req_res.append({'prov_id': provides_id,
'pkg_id': pkg_id})
if len(dep_res) == 0 and requires_name.startswith('/'): # file dependency
if len(req_res) == 0 and requires_name.startswith('/'): # file dependency
if (requires_flags & (RPMSENSE_SCRIPT_POST |
RPMSENSE_SCRIPT_PREUN |
RPMSENSE_SCRIPT_POSTUN)) != 0:
@ -95,7 +295,7 @@ SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
SELECT COUNT(1) FROM package_files WHERE package_id = ? AND path = ?
""", [cpackage_id, requires_name]).fetchone()
if int_files_cnt[0] > 0:
dep_res.append({})
req_res.append({})
else:
#TODO: Check file dependencies (/usr/bin/python (required by ant-scripts-1.7.1-7.0.6.noarch), /usr/sbin/useradd (required by tomcat5-5.5.28-0.5.2.noarch))?
files_deps = dbc.execute("""
@ -104,22 +304,22 @@ SELECT package_id FROM package_files
package_id in (SELECT id FROM packages WHERE repodir_id IN (%s))
""" % in_repodirs, [requires_name]).fetchall()
for file_dep in files_deps:
dep_res.append({'pkg_id': file_dep[0]})
req_res.append({'pkg_id': file_dep[0]})
if len(dep_res) == 0 and (requires_flags & RPMSENSE_MISSINGOK) != 0:
dep_res.append({})
if len(req_res) == 0 and (requires_flags & RPMSENSE_MISSINGOK) != 0:
req_res.append({})
if len(dep_res) > 0:
for res_rec in dep_res:
if len(req_res) > 0:
for res_rec in req_res:
dbc.execute("""
INSERT INTO package_depend_res(package_id, requires_id,
INSERT INTO package_requires_res(package_id, requires_id,
provides_id, dep_package_id)
VALUES (?, ?, ?, ?)
""", [cpackage_id, requires_id, res_rec.get('prov_id'), res_rec.get('pkg_id')])
else:
print requires_name, ' ', requires_version, ' (required by %s)' % package_nvra, ' not found!!!'
broken_dep += 1
requires_cache[requirement_uid] = dep_res
requires_cache[requirement_uid] = req_res
n = n + 1
#print "n = ", n
# if n == 60000:
@ -150,7 +350,6 @@ SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
""", [repodir_id]).fetchall()
for file_link in package_files_links:
pkg_id = file_link[0]
pkg_name = file_link[1]
pkg_nvra = file_link[2]
object_id = file_link[3]
target_obj_id = None
@ -169,10 +368,10 @@ SELECT id, link_to_path FROM package_files WHERE path = ? AND package_id = ?
# Just two levels of dependency recursion - TODO: Full depth recursion?
tofile = dbc.execute("""
SELECT id, link_to_path FROM package_files WHERE path = ? AND package_id IN (
SELECT dep_package_id FROM package_depend_res WHERE package_id = ?
SELECT dep_package_id FROM package_requires_res WHERE package_id = ?
UNION
SELECT dep_package_id FROM package_depend_res WHERE package_id IN
(SELECT dep_package_id FROM package_depend_res WHERE package_id = ?)
SELECT dep_package_id FROM package_requires_res WHERE package_id IN
(SELECT dep_package_id FROM package_requires_res WHERE package_id = ?)
)
""", [target_path, pkg_id, pkg_id]).fetchone()
if tofile:
@ -201,14 +400,14 @@ def process_repodir_so_needed(dbc, repodir_id, repodir_name, repodir_depends):
INSERT INTO so_needed_res(so_needed_id, dep_obj_file_id, res_type)
SELECT so_needed.id, tpf.id, 1 FROM packages
CROSS JOIN package_files spf CROSS JOIN so_needed CROSS JOIN rpm_requires
CROSS JOIN package_depend_res dep_res CROSS JOIN package_files tpf
CROSS JOIN package_requires_res req_res CROSS JOIN package_files tpf
WHERE so_needed.obj_file_id = spf.id AND spf.package_id = packages.id AND
packages.repodir_id = ? AND spf.package_id = rpm_requires.package_id AND
(so_needed.name = rpm_requires.name OR
so_needed.name || '()(64bit)' = rpm_requires.name) AND
packages.id = dep_res.package_id AND
rpm_requires.id = dep_res.requires_id AND
dep_res.dep_package_id = tpf.package_id AND
packages.id = req_res.package_id AND
rpm_requires.id = req_res.requires_id AND
req_res.dep_package_id = tpf.package_id AND
so_needed.name = tpf.basename
""", [repodir_id])
@ -307,6 +506,8 @@ SELECT id, name FROM repodirs WHERE id IN (%s) AND name = ?
if not all_depends_ready:
return False
print repo_name, ' ', depend_repodir_list, ' ', dep_arch
process_repodir_dependencies(dbc, repo_id, repo_name, repodir_depends, 'conflicts')
process_repodir_dependencies(dbc, repo_id, repo_name, repodir_depends, 'obsoletes')
process_repodir_requires(dbc, repo_id, repo_name, repodir_depends, dep_arch)
process_repodir_file_links(dbc, repo_id, repo_name, repodir_depends)
process_repodir_so_needed(dbc, repo_id, repo_name, repodir_depends)
@ -315,9 +516,6 @@ SELECT id, name FROM repodirs WHERE id IN (%s) AND name = ?
if repo_sources:
print 'Searching source rpms...'
dbc.execute("""
UPDATE packages SET sourcerpm_package = NULL
WHERE repodir_id = ?""", [repo_id])
dbc.execute("""
UPDATE packages SET sourcerpm_package =
(SELECT id FROM packages ps
WHERE repodir_id IN (SELECT id FROM repodirs WHERE name = ?) AND
@ -334,17 +532,17 @@ def main(args):
global n
n = 0
dbc.execute("""
PRAGMA cache_size = -1048576
dbc.executescript("""
PRAGMA cache_size = -1048576;
DELETE FROM package_requires_res;
DELETE FROM package_conflicts_res;
DELETE FROM package_obsoletes_res;
DELETE FROM so_needed_res;
DELETE FROM obj_symbols_res;
UPDATE package_files SET link_to_file_id = NULL;
UPDATE packages SET sourcerpm_package = NULL;
ANALYZE;
""")
dbc.execute("""
DELETE FROM so_needed_res""")
dbc.execute("""
DELETE FROM obj_symbols_res""")
dbc.execute("""
DELETE FROM package_depend_res""")
dbc.execute("""
ANALYZE""")
repodirs_processed = []
#Process binary rpms
repodirs_processed_cnt = -1

View file

@ -1,151 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
import rpm
RPMSENSE_LESS = 0x02
RPMSENSE_GREATER = 0x04
RPMSENSE_EQUAL = 0x08
RPMSENSE_SENSEMASK = 0x0f
RPMSENSE_FIND_PROVIDES = 0x8000
RPMSENSE_MISSINGOK = 0x80000
RPMSENSE_SCRIPT_POST = 0x400
RPMSENSE_SCRIPT_PREUN = 0x800
RPMSENSE_SCRIPT_POSTUN = 0x1000
def version_ok(required_version, compare_flag, candidate_version):
def sep_version(version):
vrem = re.match(r'\A(.+)(\-[^\-\:]+)(\:[^\:]+|)\Z', version)
if vrem:
return (vrem.group(1), vrem.group(2), vrem.group(3))
return (version, '', '')
def simple_version(version):
version = re.sub(r'[\-:]', '.', version)
version = re.sub(r'[a-z]+', '.', version, flags=re.I)
version = re.sub(r'\.\Z', '', version)
return version
def format_versions(ver1, ver2):
#see urpm-repoclosure, formatVersions
# v1 - provided
# v2 - required
(e1, e2) = (None, None)
e1_m = re.match(r'\A([^\-\:]+)\:(.*)', ver1)
if e1_m:
(e1, ver1) = (e1_m.group(1), e1_m.group(2))
e2_m = re.match(r'\A([^\-\:]+)\:(.*)', ver2)
if e2_m:
(e2, ver2) = (e2_m.group(1), e2_m.group(2))
(ver1_m, ver1_r, ver1_rr) = sep_version(ver1)
(ver2_m, ver2_r, ver2_rr) = sep_version(ver2)
if not ver2_rr:
ver1_rr = ''
if not ver2_r:
ver1_r = ''
ver1 = ver1_m + ver1_r + ver1_rr
ver2 = ver2_m + ver2_r + ver2_rr
if e1_m and e2_m:
ver1 = e1 + '.' + ver1
ver2 = e2 + '.' + ver2
return (simple_version(ver1), simple_version(ver2))
def cmp_nums(num1, num2):
# 00503
# 12
if num1 == num2:
return 0
lzeros1 = re.match(r'\A([0]+)([1-9].*)', num1)
if lzeros1:
(num1, num2) = (lzeros1.group(2), num2 + lzeros1.group(1))
lzeros2 = re.match(r'\A([0]+)([1-9].*)', num2)
if lzeros2:
(num2, num1) = (lzeros2.group(2), num1 + lzeros2.group(1))
diff = int(num1) - int(num2)
return 0 if diff == 0 else \
(1 if diff > 0 else -1)
def cmp_versions(version1, version2):
#see urpm-repoclosure, cmpVersions
# 3.2.5-5:2011.0
# NOTE: perl 5.00503 and 5.12
(v1, v2) = format_versions(version1, version2)
if v1 == v2:
return 0
v1parts = v1.split('.')
v2parts = v2.split('.')
for i in xrange(0, min(len(v1parts), len(v2parts))):
(num1, num2)= (v1parts[i], v2parts[i])
if (len(num1) > 0 and len(num2) == 0):
return 1
if (len(num1) == 0 and len(num2) > 0):
return -1
num_diff = cmp_nums(num1, num2)
if num_diff != 0:
return num_diff
if len(v1parts) < len(v2parts):
return -1
if len(v1parts) > len(v2parts):
return 1
return 0
def rpm_cmp_versions(version1, version2):
def stringToVersion(verstring):
# from rpmUtils
if verstring in [None, '']:
return (None, None, None)
e1_m = re.match(r'\A([^\-\:]+)\:(.*)', verstring)
epoch = None
if e1_m:
(epoch, verstring) = (e1_m.group(1), e1_m.group(2))
j = verstring.find('-')
if j != -1:
if verstring[:j] == '':
version = None
else:
version = verstring[:j]
release = verstring[j + 1:]
else:
if verstring == '':
version = None
else:
version = verstring
release = None
return (epoch, version, release)
(e1, v1, r1) = stringToVersion(version1)
(e2, v2, r2) = stringToVersion(version2)
if e1 is None or e2 is None:
e1 = '0'
e2 = '0'
result = rpm.labelCompare((e1, v1, r1), (e2, v2, r2))
return result
# print '===', required_version, compare_flag, candidate_version
if compare_flag == 0:
return True
if candidate_version == '*':
return True
#see urpm-repoclosure, checkDeps
if compare_flag == RPMSENSE_EQUAL and \
candidate_version ==required_version:
return True
cmp_res = cmp_versions(candidate_version, required_version)
rpm_cmp_res = rpm_cmp_versions(candidate_version, required_version)
#if (cmp_res != rpm_cmp_res):
#print >> sys.stderr, ('Invalid compare: "%s" vs "%s"! Results: rc: %d, rpm: %d.' %
#(candidate_version, required_version, cmp_res, rpm_cmp_res))
if compare_flag == RPMSENSE_EQUAL:
return cmp_res == 0
elif compare_flag == RPMSENSE_LESS | RPMSENSE_EQUAL:
return cmp_res <= 0
elif compare_flag == RPMSENSE_GREATER | RPMSENSE_EQUAL:
return cmp_res >= 0
elif compare_flag == RPMSENSE_LESS:
return cmp_res < 0
elif compare_flag == RPMSENSE_GREATER:
return cmp_res > 0
return False