import os import sys import gettext import argparse import sqlite3 import string import rpm import re DB = 'repo.db' def detect_broken_dependencies(dbc): def print_broken_packages(): for rpb_name in sorted(repo_packages_broken.keys()): rpb_id = repo_packages_broken[rpb_name] dep_chain = [] dep_id = all_broken[rpb_id]['depid'] while dep_id != 0: dep_chain.append('%s (%d)' % (all_broken[dep_id]['nvra'], all_broken[dep_id]['repo'])) dep_id = all_broken[dep_id]['depid'] print '\t' + rpb_name + ' => '+ (' => '.join(dep_chain)) print 'Total: %d' % len(repo_packages_broken) print '' # Detect broken dependencies with recursion repodirs_analyzed = [] broken = {} broken_level0 = dbc.execute(""" SELECT packages.id, nvra, repodir_id, repodirs.name, requires.name AS req_name, requires.build_arch AS req_arch FROM packages, repodirs, requires WHERE packages.repodir_id = repodirs.id AND packages.id=requires.package_id AND dep_package_id IS NULL ORDER BY repodir_id, nvra""").fetchall() all_broken = {} if len(broken_level0) > 0: print 'Broken dependencies (bottom level):' bp_reqs = [] pre_repodir_id = -1 pre_bp_id = -1 pre_bp_nvra = -1 pre_cnt = 0 for bp in broken_level0: (bp_id, bp_nvra, bp_repodir_id, bp_repodir_name, bp_reqname, bp_reqarch) = \ (bp[0], bp[1], bp[2], bp[3], bp[4], bp[5]) broken[bp_id] = bp_nvra if pre_bp_id != bp_id and pre_bp_id != -1: print '\t%s (%s)' % (pre_bp_nvra, ', '.join(bp_reqs)) pre_cnt += 1 bp_reqs = [] if bp_reqarch is not None: bp_reqname += ':' + bp_reqarch if bp_reqname not in bp_reqs: bp_reqs.append(bp_reqname) if pre_repodir_id != bp_repodir_id: if pre_repodir_id != -1: print 'Total: %d' % pre_cnt print '%d) %s' % (bp_repodir_id, bp_repodir_name) pre_repodir_id = bp_repodir_id pre_cnt = 0 if bp_id not in all_broken: all_broken[bp_id] = {'repo': bp_repodir_id, 'nvra': bp_nvra, 'reqname': bp_reqname, 'depid': 0} pre_bp_id = bp_id pre_bp_nvra = bp_nvra if pre_bp_id != -1: print '\t%s (%s)' % (pre_bp_nvra, ','.join(bp_reqs)) print 'Total: %d' % pre_cnt all_broken_cnt = -1 broken_recursive = [] while all_broken_cnt < len(all_broken): all_broken_cnt = len(all_broken) pids = ','.join(str(id) for id in all_broken.keys()) #print pids packages_broken_recurs = dbc.execute(""" SELECT packages.id, nvra, repodir_id, repodirs.name, requires.name AS req_name, build_arch, dep_package_id FROM packages, repodirs, requires WHERE packages.repodir_id = repodirs.id AND packages.id=requires.package_id AND dep_package_id IN (%(pids)s) AND packages.id NOT IN (%(pids)s) ORDER BY repodir_id, nvra""" % {'pids': pids}).fetchall() # print len(packages_broken_recurs) for packb in packages_broken_recurs: all_broken[packb[0]] = {'repo': packb[2], 'nvra': packb[1], 'reqname': packb[4], 'build_arch': packb[5], 'depid': packb[6]} broken_recursive.append(packb[0]) #print len(all_broken.keys()) if broken_recursive: print 'Recursive broken dependencies:' all_repodirs = dbc.execute(""" SELECT id, name, sources FROM repodirs ORDER BY id""").fetchall() for rd in all_repodirs: (rd_id, rd_name, rd_sources) = (rd[0], rd[1], rd[2]) if rd_sources == '.': archs = dbc.execute(""" SELECT DISTINCT build_arch FROM requires WHERE package_id IN (SELECT id FROM packages WHERE repodir_id = ?) """, [rd_id]).fetchall() for arch_rec in archs: repo_packages_broken = {all_broken[id]['nvra']: id for id in broken_recursive \ if all_broken[id]['repo'] == rd_id and all_broken[id]['build_arch'] == arch_rec[0]} if repo_packages_broken: print '%d) %s (%s)' % (rd_id, rd_name, arch_rec[0]) print_broken_packages() else: repo_packages_broken = {all_broken[id]['nvra']: id for id in broken_recursive if all_broken[id]['repo'] == rd_id} if repo_packages_broken: print '%d) %s' % (rd_id, rd_name) print_broken_packages() #all_repodirs = dbc.execute(""" #SELECT id, name FROM repodirs ORDER BY id""").fetchall() #for rd in all_repodirs: #(rd_id, rd_name) = (rd[0], rd[1]) #repo_broken_recurs = dbc.execute(""" #SELECT packages.id, nvra, requires.name AS req_name #FROM packages, requires #WHERE packages.repodir_id = ? AND packages.id=requires.package_id AND #dep_package_id IS NULL AND packages.id NOT IN() #""").fetchall() #print rd_name def detect_lost_sources(dbc): print '===' print 'Lost sources:' repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs WHERE sources <> '.' ORDER BY id """).fetchall() for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) lost_sources = dbc.execute(""" SELECT name, nvra, sourcerpm FROM packages WHERE repodir_id = ? AND sourcerpm IS NOT NULL AND sourcerpm_package IS NULL ORDER BY name """, [rd_id]).fetchall() if lost_sources: print '%d) %s' % (rd_id, rd_name) for ls in lost_sources: print '\t%s (%s)' % (ls[1], ls[2]) print 'Total: %d' % len(lost_sources) def analyze_partitioning(dbc): print '===' print 'Possible partitioning:' repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs WHERE sources <> '.' ORDER BY id """).fetchall() for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) partitions = [] partitioned_packages = [] singles = [] while True: ppackages = ','.join(str(id) for id in partitioned_packages) if not ppackages: ppackages = '0' pkg1_rec = dbc.execute(""" SELECT id, name, nvra FROM packages WHERE repodir_id = ? AND id NOT IN (%s) ORDER BY name LIMIT 1""" % ppackages, [rd_id]).fetchall() if not pkg1_rec: break if not partitioned_packages: print '%d) %s' % (rd_id, rd_name) (pkg_id, pkg_name) = (pkg1_rec[0][0], pkg1_rec[0][2]) partition_names = [] partition_names.append(pkg_name) partition_ids = [] partition_ids.append(pkg_id) partitioned_packages.append(pkg_id) current_level_packages = [pkg_id] while True: cl_packages = ','.join(str(id) for id in current_level_packages) part_packages = ','.join(str(id) for id in partition_ids) upper_packages = dbc.execute(""" SELECT packages.id, packages.name, nvra FROM packages, requires WHERE packages.id = requires.package_id AND packages.repodir_id = ? AND requires.dep_package_id IN (%s) AND packages.id NOT IN (%s) ORDER BY packages.name """ % (cl_packages, part_packages), [rd_id]).fetchall() lower_packages = dbc.execute(""" SELECT packages.id, packages.name, nvra FROM packages, requires WHERE packages.id = requires.dep_package_id AND packages.repodir_id = ? AND requires.package_id IN (%s) AND packages.id NOT IN (%s) ORDER BY packages.name """ % (cl_packages, part_packages), [rd_id]).fetchall() if not upper_packages and not lower_packages: break current_level_packages = [] for rec in upper_packages: if rec[0] not in current_level_packages: current_level_packages.append(rec[0]) partitioned_packages.append(rec[0]) partition_ids.append(rec[0]) partition_names.append(rec[2]) for rec in lower_packages: if rec[0] not in current_level_packages: current_level_packages.append(rec[0]) partitioned_packages.append(rec[0]) partition_ids.append(rec[0]) partition_names.append(rec[2]) if len(partition_names) == 1: #print partition_names singles.append(partition_names[0]) #raise Exception('aaa') else: for p in sorted(partition_names): print '\t%s', p print 'Total: %d' % len(partition_names) print '---' print '' if len(singles) > 0: print 'Singles:' for s in sorted(singles): print '\t%s' % s print 'Total: %d' % len(singles) def detect_lost_object_files(dbc): print '===' print 'Lost object (executable) files (provided but not found):' repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs ORDER BY id """).fetchall() for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) lost_object_files = dbc.execute(""" SELECT nvra, package_files.path, mark FROM packages, package_files WHERE repodir_id = ? AND packages.id = package_files.package_id AND mark = 'not-found' ORDER BY packages.name, package_files.path """, [rd_id]).fetchall() if lost_object_files: print '%d) %s' % (rd_id, rd_name) for lof in lost_object_files: print '\t%s: %s' % (lof[0], lof[1]) print 'Total: %d' % len(lost_object_files) def detect_broken_object_links(dbc): print '===' print 'Invalid object (executable) file links:' repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs ORDER BY id """).fetchall() for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) broken_object_links = dbc.execute(""" SELECT nvra, package_files.path, link_to_path, mark FROM packages, package_files WHERE repodir_id = ? AND packages.id = package_files.package_id AND mark = 'link' AND link_to_path IS NOT NULL AND link_to_file_id IS NULL ORDER BY packages.name, package_files.path """, [rd_id]).fetchall() if broken_object_links: print '%d) %s' % (rd_id, rd_name) for bol in broken_object_links: print '\t%s: %s -/-> %s' % \ (bol[0], bol[1], bol[2]) print 'Total: %d' % len(broken_object_links) def get_repodir_depends(dbc, repodir_id): dep_repos = dbc.execute(""" SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ? """, [repodir_id]).fetchall() return ', '.join([dep_repo[0] for dep_repo in dep_repos]) def detect_so_needed_not_found(dbc): repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs ORDER BY id """).fetchall() print '===' print 'Objects needed and resolved by rpm requires-provides:' for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) objects_needed_resolved1 = dbc.execute(""" SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROSS JOIN so_needed_res WHERE repodir_id = ? AND package_files.package_id = packages.id AND so_needed.obj_file_id = package_files.id AND so_needed_id = so_needed.id AND res_type = 1 """, [rd_id]).fetchone() print '%d) %s: %d' % (rd_id, rd_name, objects_needed_resolved1[0]) print '===' print 'Objects needed and resolved by flat search:' for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) objects_needed_resolved2 = dbc.execute(""" SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROSS JOIN so_needed_res WHERE repodir_id = ? AND package_files.package_id = packages.id AND so_needed.obj_file_id = package_files.id AND so_needed_id = so_needed.id AND res_type = 2 """, [rd_id]).fetchone() print '%d) %s: %d' % (rd_id, rd_name, objects_needed_resolved2[0]) print '===' print 'Objects needed but not resolved:' for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) objects_needed_not_resolved = dbc.execute(""" SELECT packages.nvra, package_files.path, so_needed.name FROM packages CROSS JOIN package_files CROSS JOIN so_needed LEFT OUTER JOIN so_needed_res ON so_needed_id = so_needed.id WHERE repodir_id = ? AND package_files.package_id = packages.id AND so_needed.obj_file_id = package_files.id AND so_needed_id IS NULL """, [rd_id]).fetchall() if objects_needed_not_resolved: repodir_depends = get_repodir_depends(dbc, rd_id) print ('%d) %s' % (rd_id, rd_name)) + \ ('' if repodir_depends == '' else (' (depends on: %s)' % repodir_depends)) for obj_nr in objects_needed_not_resolved: print '\t%s: %s -?-> %s' % (obj_nr[0], obj_nr[1], obj_nr[2]) print 'Total: %d' % len(objects_needed_not_resolved) def detect_symbols_not_found(dbc): repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs ORDER BY id """).fetchall() print '===' print 'Symbols resolved by .so NEEDED search:' for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) symbols_resolved1_2 = dbc.execute(""" SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CROSS JOIN obj_symbols_res WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND obj_symbols_res.obj_sym_id = obj_symbols.id AND res_type IN (1, 2) """, [rd_id]).fetchone() print '%d) %s: %d' % (rd_id, rd_name, symbols_resolved1_2[0]) print '===' print 'Symbols resolved by flat search:' for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) symbols_resolved3 = dbc.execute(""" SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CROSS JOIN obj_symbols_res WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND obj_symbols_res.obj_sym_id = obj_symbols.id AND res_type = 3 """, [rd_id]).fetchone() print '%d) %s: %d' % (rd_id, rd_name, symbols_resolved3[0]) print '===' print 'Symbols not resolved:' for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) symbols_not_resolved = dbc.execute(""" SELECT packages.nvra, package_files.path, obj_symbols.name FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND NOT EXISTS (SELECT 1 FROM obj_symbols_res WHERE obj_sym_id = obj_symbols.id) """, [rd_id]).fetchall() if symbols_not_resolved: repodir_depends = get_repodir_depends(dbc, rd_id) print ('%d) %s' % (rd_id, rd_name)) + \ ('' if repodir_depends == '' else (' (depends on: %s)' % repodir_depends)) for sym_nr in symbols_not_resolved: print '\t%s: %s -?-> %s' % (sym_nr[0], sym_nr[1], sym_nr[2]) print 'Total: %d' % len(symbols_not_resolved) def main(args): conn = sqlite3.connect(DB) dbc = conn.cursor() detect_broken_dependencies(dbc) detect_lost_sources(dbc) analyze_partitioning(dbc) detect_lost_object_files(dbc) detect_broken_object_links(dbc) detect_so_needed_not_found(dbc) detect_symbols_not_found(dbc) conn.close() if __name__ == "__main__": main(sys.argv)