#!/usr/bin/python # -*- coding: utf-8 -*- import os import sys import gettext import argparse import sqlite3 import string import rpm import re gettext.install('urpm-tools') DB = 'repo.db' def parseargs(): parser = argparse.ArgumentParser(description=_('analyze repositories metadata ' ' from repo.db')) parser.add_argument('-d', '--dot-graphs', action='store_true', help=_('visualize dependencies in .DOT graphs')) opts = parser.parse_args() return opts def detect_broken_dependencies(dbc, dot_output): def print_broken_packages(repo_packages_broken, build_arch, current_repodir, reponames, all_broken): def build_dep_chains(pkg_id, current_repodir, all_broken, build_arch, reponames): package_title = all_broken[pkg_id]['nvra'] dep_chains = [] if all_broken[pkg_id]['repo'] != current_repodir: package_title += ' {' + reponames[all_broken[pkg_id]['repo']] + '}' else: deps = all_broken[pkg_id]['deps'] if deps is not None: for dep_id in deps: if deps[dep_id]['build_arch'] == build_arch: chains = build_dep_chains(dep_id, current_repodir, all_broken, build_arch, reponames) for chain in chains: dep_chains.append(chain) if len(dep_chains) == 0: dep_chains.append([]) for dep_chain in dep_chains: dep_chain.insert(0, package_title) return dep_chains for rpb_name in sorted(repo_packages_broken.keys()): rpb_id = repo_packages_broken[rpb_name] alternatives = False dep_chains = build_dep_chains(rpb_id, current_repodir, all_broken, build_arch, reponames) if len(dep_chains) > 1: print '\t\tMultiple dependencies (for %s):' % rpb_name for dep_chain in dep_chains: print '\t' + (' => '.join(dep_chain)) print 'Total: %d' % len(repo_packages_broken) print '' # Detect broken dependencies with recursion repodirs_analyzed = [] broken = {} broken_level0 = dbc.execute(""" SELECT packages.id, nvra, repodir_id, repodirs.name, rpm_requires.name AS req_name, rpm_requires.build_arch AS req_arch FROM packages, repodirs, rpm_requires WHERE packages.repodir_id = repodirs.id AND packages.id = rpm_requires.package_id AND NOT EXISTS (SELECT 1 FROM package_requires_res pqr WHERE pqr.package_id = packages.id AND pqr.requires_id = rpm_requires.id) ORDER BY repodir_id, nvra""").fetchall() all_broken = {} if len(broken_level0) > 0: print 'Broken dependencies (bottom level):' bp_reqs = [] pre_repodir_id = -1 pre_bp_id = -1 pre_cnt = 0 for bp in broken_level0: (bp_id, bp_nvra, bp_repodir_id, bp_repodir_name, bp_reqname, bp_reqarch) = \ (bp[0], bp[1], bp[2], bp[3], bp[4], bp[5]) broken[bp_id] = bp_nvra if pre_bp_id != bp_id and pre_bp_id != -1: all_broken[pre_bp_id]['brokenreqs'] = bp_reqs print '\t%s (%s)' % (all_broken[pre_bp_id]['nvra'], ', '.join(bp_reqs)) pre_cnt += 1 bp_reqs = [] if bp_reqarch is not None: bp_reqname += ':' + bp_reqarch if bp_reqname not in bp_reqs: bp_reqs.append(bp_reqname) if pre_repodir_id != bp_repodir_id: if pre_repodir_id != -1: print 'Total: %d' % pre_cnt print '%d) %s' % (bp_repodir_id, bp_repodir_name) pre_repodir_id = bp_repodir_id pre_cnt = 0 if bp_id not in all_broken: all_broken[bp_id] = {'nvra': bp_nvra, 'repo': bp_repodir_id, 'brokenreqs': [], 'deps': None} pre_bp_id = bp_id if pre_bp_id != -1: all_broken[pre_bp_id]['brokenreqs'] = bp_reqs print '\t%s (%s)' % (all_broken[pre_bp_id]['nvra'], ', '.join(bp_reqs)) print 'Total: %d' % pre_cnt all_broken_cnt = -1 broken_recursive = [] while all_broken_cnt < len(all_broken): all_broken_cnt = len(all_broken) pids = ','.join(str(id) for id in all_broken.keys()) packages_broken_recurs = dbc.execute(""" SELECT packages.id, nvra, repodir_id, repodirs.name, rpm_requires.name AS req_name, build_arch, dep_package_id FROM packages, repodirs, rpm_requires, package_requires_res WHERE packages.repodir_id = repodirs.id AND packages.id = rpm_requires.package_id AND packages.id = package_requires_res.package_id AND rpm_requires.id = package_requires_res.requires_id AND dep_package_id IN (%(pids)s) AND packages.id NOT IN (%(pids)s) ORDER BY repodir_id, nvra""" % {'pids': pids}).fetchall() for packb in packages_broken_recurs: pkg_id = packb[0] if pkg_id not in all_broken: all_broken[pkg_id] = {'nvra': packb[1], 'repo': packb[2], 'deps': {}} dep_pkg_id = packb[6] deps = all_broken[pkg_id]['deps'] if dep_pkg_id not in deps: deps[dep_pkg_id] = {'build_arch': packb[5], 'req_names': []} deps[dep_pkg_id]['req_names'].append(packb[4]) broken_recursive.append(pkg_id) all_repodirs = dbc.execute(""" SELECT id, name, sources FROM repodirs ORDER BY id""").fetchall() reponames = {repodir[0]: repodir[1] for repodir in all_repodirs} if broken_recursive: print '===\nRecursive broken dependencies:' for rd in all_repodirs: (rd_id, rd_name, rd_sources) = (rd[0], rd[1], rd[2]) if rd_sources == '.': archs = dbc.execute(""" SELECT DISTINCT build_arch FROM rpm_requires WHERE package_id IN (SELECT id FROM packages WHERE repodir_id = ?) """, [rd_id]).fetchall() for arch_rec in archs: arch = arch_rec[0] repo_packages_broken = {} for pkg_id in broken_recursive: package = all_broken[pkg_id] if package['repo'] == rd_id: for dep in package['deps']: if package['deps'][dep]['build_arch'] == arch: repo_packages_broken[package['nvra']] = pkg_id if repo_packages_broken: print '%d) %s (%s)' % (rd_id, rd_name, arch) print_broken_packages(repo_packages_broken, arch, rd_id, reponames, all_broken) else: repo_packages_broken = {all_broken[id]['nvra']: id for id in broken_recursive if all_broken[id]['repo'] == rd_id} if repo_packages_broken: print '%d) %s' % (rd_id, rd_name) print_broken_packages(repo_packages_broken, None, rd_id, reponames, all_broken) if dot_output: for rd in all_repodirs: (rd_id, rd_name, rd_sources) = (rd[0], rd[1], rd[2]) dot_file = None for pkg_id in all_broken: package = all_broken[pkg_id] if package['repo'] != rd_id: continue if not dot_file: dot_file = open('broken-repo-%d.dot' % rd_id, 'w') OutputGraphHead(dot_file, rd_name) if package['deps'] is None: dot_file.write('"%s" [color="red"];\n' % package['nvra']) else: deps = package['deps'] for dep_id in deps: dep_package_title = all_broken[dep_id]['nvra'] if all_broken[dep_id]['repo'] != rd_id: dep_package_title += ' {' + \ reponames[all_broken[dep_id]['repo']] + '}' dot_file.write('"%s" -> "%s" [color="blue"];\n' % (package['nvra'], dep_package_title)) if dot_file: OutputGraphTail(dot_file) def OutputGraphHead(file_output, dg_name): """Output Graph head. Static information about graph. """ file_output.write('\n\ndigraph "%s" {\n' % dg_name + \ 'size="20.69,25.52";\nratio="fill";\n' + \ 'rankdir="TB";\nnode[style="filled"];\nnode[shape="box"];\n\n') def OutputGraphTail(file_output): """Finish the graph. """ file_output.write('}\n') def detect_loops(dbc): header = '===\n' \ 'Loopbacks:' repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs ORDER BY id """).fetchall() for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) loopbacks = dbc.execute(""" SELECT p.id, p.nvra, rpm_requires.name FROM package_requires_res pqr, packages p, rpm_requires WHERE pqr.package_id = p.id AND pqr.package_id = dep_package_id AND rpm_requires.id = pqr.requires_id and p.repodir_id = ? ORDER BY p.nvra, rpm_requires.name """, [rd_id]).fetchall() if loopbacks: if header: print header header = None print '%d) %s' % (rd_id, rd_name) pre_pkg_id = None pre_pkg_name = None requires = [] cnt = 0 for lb_rec in loopbacks: pkg_id = lb_rec[0] pkg_name = lb_rec[1] if pkg_id != pre_pkg_id: cnt += 1 if pre_pkg_id is not None: print '\t%s (%s)' % (pre_pkg_name, ','.join(requires)) requires = [] pre_pkg_id = pkg_id pre_pkg_name = pkg_name requires.append(lb_rec[2]) if pre_pkg_id is not None: print '\t%s (%s)' % (pre_pkg_name, ','.join(requires)) print 'Total: %d' % cnt def analyze_partitioning(dbc): print '===' print 'Possible partitioning:' repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs WHERE sources <> '.' ORDER BY id """).fetchall() for repodir in repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) partitions = [] partitioned_packages = [] singles = [] while True: ppackages = ','.join(str(id) for id in partitioned_packages) if not ppackages: ppackages = '0' pkg1_rec = dbc.execute(""" SELECT id, name, nvra FROM packages WHERE repodir_id = ? AND id NOT IN (%s) ORDER BY name LIMIT 1""" % ppackages, [rd_id]).fetchall() if not pkg1_rec: break if not partitioned_packages: print '%d) %s' % (rd_id, rd_name) (pkg_id, pkg_name) = (pkg1_rec[0][0], pkg1_rec[0][2]) partition_names = [] partition_names.append(pkg_name) partition_ids = [] partition_ids.append(pkg_id) partitioned_packages.append(pkg_id) current_level_packages = [pkg_id] while True: cl_packages = ','.join(str(id) for id in current_level_packages) part_packages = ','.join(str(id) for id in partition_ids) upper_packages = dbc.execute(""" SELECT packages.id, packages.name, nvra FROM packages, package_requires_res WHERE packages.id = package_requires_res.package_id AND packages.repodir_id = ? AND package_requires_res.dep_package_id IN (%s) AND packages.id NOT IN (%s) ORDER BY packages.name """ % (cl_packages, part_packages), [rd_id]).fetchall() lower_packages = dbc.execute(""" SELECT packages.id, packages.name, nvra FROM packages, package_requires_res WHERE packages.id = package_requires_res.dep_package_id AND packages.repodir_id = ? AND package_requires_res.package_id IN (%s) AND packages.id NOT IN (%s) ORDER BY packages.name """ % (cl_packages, part_packages), [rd_id]).fetchall() if not upper_packages and not lower_packages: break current_level_packages = [] for rec in upper_packages: if rec[0] not in current_level_packages: current_level_packages.append(rec[0]) partitioned_packages.append(rec[0]) partition_ids.append(rec[0]) partition_names.append(rec[2]) for rec in lower_packages: if rec[0] not in current_level_packages: current_level_packages.append(rec[0]) partitioned_packages.append(rec[0]) partition_ids.append(rec[0]) partition_names.append(rec[2]) if len(partition_names) == 1: #print partition_names singles.append(partition_names[0]) #raise Exception('aaa') else: for p in sorted(partition_names): print '\t%s' % p print 'Total: %d' % len(partition_names) print '---' print '' if len(singles) > 0: print 'Singles:' for s in sorted(singles): print '\t%s' % s print 'Total: %d' % len(singles) class query_output: repositories = {} repodirs = None title = None dbc = None def __init__(self, dbc): self.dbc = dbc self.repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs ORDER BY id """).fetchall() for repodir in self.repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) (output, count) = self.get_data(rd_id) repo_out_dict = {'output': output, 'count': count} self.repositories[rd_id] = repo_out_dict def get_repodir_name(self, repodir_id): for repodir in self.repodirs: if repodir[0] == repodir_id: return repodir[1] return None def print_text(self): title_printed = False for repodir in self.repodirs: (rd_id, rd_name) = (repodir[0], repodir[1]) if rd_id not in self.repositories: continue repodir = self.repositories[rd_id] repo_output = repodir['output'] if repo_output is None or len(repo_output) > 0: if self.title and not title_printed: print '===\n' + self.title + ':' title_printed = True self.print_text_reponame(rd_id, rd_name) if repo_output is not None: for line in repo_output: print '\t' + line print 'Total: %d' % repodir['count'] def print_text_reponame(self, repodir_id, repodir_name): print '%d) %s' % (repodir_id, repodir_name) def get_data(self, repodir_id): return (None, 0) class query_output_rows(query_output): query = '' def get_data(self, repodir_id): rows = self.dbc.execute(self.query, [repodir_id]).fetchall() result = [row[0] for row in rows] return (result, len(rows)) class query_output_count(query_output): query = None def get_data(self, repodir_id): count_rec = self.dbc.execute(self.query, [repodir_id]).fetchone() return (None, count_rec[0]) def print_text_reponame(self, repodir_id, repodir_name): print '%d) %s: %d' % (repodir_id, repodir_name, self.repositories[repodir_id]['count']) class lost_sources(query_output_rows): title = 'Lost sources' query = """ SELECT nvra || ' (' || sourcerpm || ')' FROM packages WHERE repodir_id = ? AND sourcerpm IS NOT NULL AND sourcerpm_package IS NULL ORDER BY name """ class lost_object_files(query_output_rows): title = 'Lost object (executable) files (provided but not found)' query = """ SELECT nvra || ': ' || package_files.path FROM packages, package_files WHERE repodir_id = ? AND packages.id = package_files.package_id AND mark = 'not-found' ORDER BY packages.name, package_files.path """ class broken_object_links(query_output_rows): title = 'Invalid object (executable) file links' query = """ SELECT nvra || ': ' || package_files.path || ' -/-> ' || link_to_path FROM packages, package_files WHERE repodir_id = ? AND packages.id = package_files.package_id AND mark = 'link' AND link_to_path IS NOT NULL AND link_to_file_id IS NULL ORDER BY packages.name, package_files.path """ def print_reponame_with_depends(dbc, repodir_id, repodir_name): dep_repos = dbc.execute(""" SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ? """, [repodir_id]).fetchall() repodir_depends = ', '.join([dep_repo[0] for dep_repo in dep_repos]) print ('%d) %s' % (repodir_id, repodir_name)) + \ ('' if repodir_depends == '' else (' (depends on: %s)' % repodir_depends)) class so_needed_resolved1(query_output_count): title = 'Objects needed and resolved by rpm requires-provides' query = """ SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROSS JOIN so_needed_res WHERE repodir_id = ? AND package_files.package_id = packages.id AND so_needed.obj_file_id = package_files.id AND so_needed_id = so_needed.id AND res_type = 1 """ class so_needed_resolved2(query_output_count): title = 'Objects needed and resolved by flat search' query = """ SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROSS JOIN so_needed_res WHERE repodir_id = ? AND package_files.package_id = packages.id AND so_needed.obj_file_id = package_files.id AND so_needed_id = so_needed.id AND res_type = 2 """ class so_needed_not_resolved(query_output_rows): title = 'Objects needed but not resolved' query = """ SELECT packages.nvra || ': ' || package_files.path || ' -?-> ' || so_needed.name FROM packages CROSS JOIN package_files CROSS JOIN so_needed LEFT OUTER JOIN so_needed_res ON so_needed_id = so_needed.id WHERE repodir_id = ? AND package_files.package_id = packages.id AND so_needed.obj_file_id = package_files.id AND so_needed_id IS NULL """ def print_reponame(self, repodir_id, repodir_name): print_reponame_with_depends(self.dbc, repodir_id, repodir_name) class symbols_resolved1_2(query_output_count): title = 'Symbols resolved by .so NEEDED search' query = """ SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CROSS JOIN obj_symbols_res WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND obj_symbols_res.obj_sym_id = obj_symbols.id AND res_type IN (1, 2) """ class symbols_resolved3(query_output_count): title = 'Symbols resolved by flat search' query = """ SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CROSS JOIN obj_symbols_res WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND obj_symbols_res.obj_sym_id = obj_symbols.id AND res_type = 3 """ class symbols_not_resolved(query_output_rows): title = 'Symbols not resolved' query = """ SELECT packages.nvra || ': ' || package_files.path || ' -?-> ' || obj_symbols.name FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND NOT EXISTS (SELECT 1 FROM obj_symbols_res WHERE obj_sym_id = obj_symbols.id) """ def print_reponame(self, repodir_id, repodir_name): print_reponame_with_depends(self.dbc, repodir_id, repodir_name) class file_conflicts(query_output): title = 'File conflicts' def get_data(self, repodir_id): rows = self.dbc.execute(""" SELECT spf.path, sp.nvra, tp.nvra, tp.repodir_id FROM packages sp, package_files spf, package_files tpf, packages tp WHERE sp.id = spf.package_id AND sp.repodir_id = ? AND spf.mode <> -1 AND (spf.mode & 16384) == 0 AND spf.path = tpf.path AND spf.package_id <> tpf.package_id AND tpf.mode <> -1 AND tpf.package_id = tp.id AND tp.name <> sp.name AND (tp.repodir_id == sp.repodir_id OR tp.repodir_id IN (SELECT rd.id FROM repodirs rd, repodir_depends rdd WHERE depend_repodir_name = rd.name AND rdd.repodir_id = sp.repodir_id)) AND NOT EXISTS ( SELECT 1 FROM package_conflicts_res pcr WHERE (sp.id = pcr.package_id AND tp.id = pcr.dep_package_id) OR (tp.id = pcr.package_id AND sp.id = pcr.dep_package_id) ) AND NOT EXISTS ( SELECT 1 FROM package_obsoletes_res por WHERE (sp.id = por.package_id AND tp.id = por.dep_package_id) OR (tp.id = por.package_id AND sp.id = por.dep_package_id) ) ORDER BY spf.path, sp.nvra, tp.nvra """, [repodir_id]).fetchall() # tp.name <> sp.name condition added to exclude different versions of the same package pre_path = None path_packages = [] result = [] for row in rows: (path, pkg1, pkg2) = (row[0], row[1], row[2]) if row[3] != repodir_id: pkg2 += ' {%s}' % self.get_repodir_name(row[3]) if path != pre_path and pre_path is not None: result.append('%s: %s' % (pre_path, '; '.join(path_packages))) path_packages = [] if pkg1 not in path_packages: path_packages.append(pkg1) if pkg2 not in path_packages: path_packages.append(pkg2) pre_path = path if pre_path is not None: result.append('%s: %s' % (pre_path, '; '.join(path_packages))) return (result, len(result)) class provides_conflicts(query_output): title = 'Provided capabilities conflicts' def get_data(self, repodir_id): rows = self.dbc.execute(""" SELECT srp.name, sp.nvra, tp.nvra, tp.repodir_id FROM packages sp, rpm_provides srp, rpm_provides trp, packages tp WHERE sp.id = srp.package_id AND sp.repodir_id = ? AND srp.name = trp.name AND srp.package_id <> trp.package_id AND trp.package_id = tp.id AND tp.name <> sp.name AND (tp.repodir_id == sp.repodir_id OR tp.repodir_id IN (SELECT rd.id FROM repodirs rd, repodir_depends rdd WHERE depend_repodir_name = rd.name AND rdd.repodir_id = sp.repodir_id)) AND NOT EXISTS ( SELECT 1 FROM package_conflicts_res pcr WHERE (sp.id = pcr.package_id AND tp.id = pcr.dep_package_id) OR (tp.id = pcr.package_id AND sp.id = pcr.dep_package_id) ) AND NOT EXISTS ( SELECT 1 FROM package_obsoletes_res por WHERE (sp.id = por.package_id AND tp.id = por.dep_package_id) OR (tp.id = por.package_id AND sp.id = por.dep_package_id) ) AND EXISTS (SELECT 1 FROM rpm_requires WHERE rpm_requires.name = srp.name) ORDER BY srp.name, sp.nvra, tp.nvra """, [repodir_id]).fetchall() # tp.name <> sp.name condition added to exclude different versions of the same package pre_path = None path_packages = [] result = [] for row in rows: (path, pkg1, pkg2) = (row[0], row[1], row[2]) if row[3] != repodir_id: pkg2 += ' {%s}' % self.get_repodir_name(row[3]) if path != pre_path and pre_path is not None: result.append('%s: %s' % (pre_path, '; '.join(path_packages))) path_packages = [] if pkg1 not in path_packages: path_packages.append(pkg1) if pkg2 not in path_packages: path_packages.append(pkg2) pre_path = path if pre_path is not None: result.append('%s: %s' % (pre_path, '; '.join(path_packages))) return (result, len(result)) def main(args): options = parseargs() conn = sqlite3.connect(DB) dbc = conn.cursor() detect_broken_dependencies(dbc, options.dot_graphs) #detect_loops(dbc) analyze_partitioning(dbc) lost_sources(dbc).print_text() lost_object_files(dbc).print_text() broken_object_links(dbc).print_text() so_needed_resolved1(dbc).print_text() so_needed_resolved2(dbc).print_text() so_needed_not_resolved(dbc).print_text() symbols_resolved1_2(dbc).print_text() symbols_resolved3(dbc).print_text() symbols_not_resolved(dbc).print_text() file_conflicts(dbc).print_text() provides_conflicts(dbc).print_text() conn.close() if __name__ == "__main__": main(sys.argv)