repo-analyzer/analyze-repodb.py
2014-02-13 12:03:20 +04:00

536 lines
22 KiB
Python
Executable file

#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import sys
import gettext
import argparse
import sqlite3
import string
import rpm
import re
gettext.install('urpm-tools')
DB = 'repo.db'
def parseargs():
parser = argparse.ArgumentParser(description=_('analyze repositories metadata '
' from repo.db'))
parser.add_argument('-d', '--dot-graphs', action='store_true',
help=_('visualize dependencies in .DOT graphs'))
opts = parser.parse_args()
return opts
def detect_broken_dependencies(dbc, dot_output):
def print_broken_packages(repo_packages_broken, build_arch, current_repodir, reponames, all_broken):
def build_dep_chains(pkg_id, current_repodir, all_broken, build_arch, reponames):
package_title = all_broken[pkg_id]['nvra']
dep_chains = []
if all_broken[pkg_id]['repo'] != current_repodir:
package_title += ' {' + reponames[all_broken[pkg_id]['repo']] + '}'
else:
deps = all_broken[pkg_id]['deps']
if deps is not None:
for dep_id in deps:
if deps[dep_id]['build_arch'] == build_arch:
chains = build_dep_chains(dep_id, current_repodir,
all_broken, build_arch,
reponames)
for chain in chains:
dep_chains.append(chain)
if len(dep_chains) == 0:
dep_chains.append([])
for dep_chain in dep_chains:
dep_chain.insert(0, package_title)
return dep_chains
for rpb_name in sorted(repo_packages_broken.keys()):
rpb_id = repo_packages_broken[rpb_name]
alternatives = False
dep_chains = build_dep_chains(rpb_id, current_repodir, all_broken, build_arch, reponames)
if len(dep_chains) > 1:
print '\t\tMultiple dependencies (for %s):' % rpb_name
for dep_chain in dep_chains:
print '\t' + (' => '.join(dep_chain))
print 'Total: %d' % len(repo_packages_broken)
print ''
# Detect broken dependencies with recursion
repodirs_analyzed = []
broken = {}
broken_level0 = dbc.execute("""
SELECT packages.id, nvra, repodir_id, repodirs.name,
rpm_requires.name AS req_name, rpm_requires.build_arch AS req_arch
FROM packages, repodirs, rpm_requires
WHERE packages.repodir_id = repodirs.id AND
packages.id=rpm_requires.package_id AND
NOT EXISTS (SELECT 1 FROM package_depend_res pdr
WHERE pdr.package_id = packages.id AND
pdr.requires_id = rpm_requires.id)
ORDER BY repodir_id, nvra""").fetchall()
all_broken = {}
if len(broken_level0) > 0:
print 'Broken dependencies (bottom level):'
bp_reqs = []
pre_repodir_id = -1
pre_bp_id = -1
pre_cnt = 0
for bp in broken_level0:
(bp_id, bp_nvra, bp_repodir_id, bp_repodir_name, bp_reqname, bp_reqarch) = \
(bp[0], bp[1], bp[2], bp[3], bp[4], bp[5])
broken[bp_id] = bp_nvra
if pre_bp_id != bp_id and pre_bp_id != -1:
all_broken[pre_bp_id]['brokenreqs'] = bp_reqs
print '\t%s (%s)' % (all_broken[pre_bp_id]['nvra'],
', '.join(bp_reqs))
pre_cnt += 1
bp_reqs = []
if bp_reqarch is not None:
bp_reqname += ':' + bp_reqarch
if bp_reqname not in bp_reqs:
bp_reqs.append(bp_reqname)
if pre_repodir_id != bp_repodir_id:
if pre_repodir_id != -1:
print 'Total: %d' % pre_cnt
print '%d) %s' % (bp_repodir_id, bp_repodir_name)
pre_repodir_id = bp_repodir_id
pre_cnt = 0
if bp_id not in all_broken:
all_broken[bp_id] = {'nvra': bp_nvra,
'repo': bp_repodir_id,
'brokenreqs': [],
'deps': None}
pre_bp_id = bp_id
if pre_bp_id != -1:
all_broken[pre_bp_id]['brokenreqs'] = bp_reqs
print '\t%s (%s)' % (all_broken[pre_bp_id]['nvra'],
', '.join(bp_reqs))
print 'Total: %d' % pre_cnt
all_broken_cnt = -1
broken_recursive = []
while all_broken_cnt < len(all_broken):
all_broken_cnt = len(all_broken)
pids = ','.join(str(id) for id in all_broken.keys())
packages_broken_recurs = dbc.execute("""
SELECT packages.id, nvra, repodir_id, repodirs.name,
rpm_requires.name AS req_name, build_arch, dep_package_id
FROM packages, repodirs, rpm_requires, package_depend_res
WHERE packages.repodir_id = repodirs.id AND
packages.id = rpm_requires.package_id AND
packages.id = package_depend_res.package_id AND
rpm_requires.id = package_depend_res.requires_id AND
dep_package_id IN (%(pids)s) AND
packages.id NOT IN (%(pids)s)
ORDER BY repodir_id, nvra""" % {'pids': pids}).fetchall()
for packb in packages_broken_recurs:
pkg_id = packb[0]
if pkg_id not in all_broken:
all_broken[pkg_id] = {'nvra': packb[1], 'repo': packb[2],
'deps': {}}
dep_pkg_id = packb[6]
deps = all_broken[pkg_id]['deps']
if dep_pkg_id not in deps:
deps[dep_pkg_id] = {'build_arch': packb[5], 'req_names': []}
deps[dep_pkg_id]['req_names'].append(packb[4])
broken_recursive.append(pkg_id)
all_repodirs = dbc.execute("""
SELECT id, name, sources FROM repodirs ORDER BY id""").fetchall()
reponames = {repodir[0]: repodir[1] for repodir in all_repodirs}
if broken_recursive:
print 'Recursive broken dependencies:'
for rd in all_repodirs:
(rd_id, rd_name, rd_sources) = (rd[0], rd[1], rd[2])
if rd_sources == '.':
archs = dbc.execute("""
SELECT DISTINCT build_arch FROM rpm_requires
WHERE package_id IN (SELECT id FROM packages WHERE repodir_id = ?)
""", [rd_id]).fetchall()
for arch_rec in archs:
arch = arch_rec[0]
repo_packages_broken = {}
for pkg_id in broken_recursive:
package = all_broken[pkg_id]
if package['repo'] == rd_id:
for dep in package['deps']:
if package['deps'][dep]['build_arch'] == arch:
repo_packages_broken[package['nvra']] = pkg_id
if repo_packages_broken:
print '%d) %s (%s)' % (rd_id, rd_name, arch)
print_broken_packages(repo_packages_broken, arch,
rd_id, reponames, all_broken)
else:
repo_packages_broken = {all_broken[id]['nvra']: id
for id in broken_recursive
if all_broken[id]['repo'] == rd_id}
if repo_packages_broken:
print '%d) %s' % (rd_id, rd_name)
print_broken_packages(repo_packages_broken, None,
rd_id, reponames, all_broken)
if dot_output:
for rd in all_repodirs:
(rd_id, rd_name, rd_sources) = (rd[0], rd[1], rd[2])
dot_file = None
for pkg_id in all_broken:
package = all_broken[pkg_id]
if package['repo'] != rd_id:
continue
if not dot_file:
dot_file = open('broken-repo-%d.dot' % rd_id, 'w')
OutputGraphHead(dot_file, rd_name)
if package['deps'] is None:
dot_file.write('"%s" [color="red"];\n' % package['nvra'])
else:
deps = package['deps']
for dep_id in deps:
dep_package_title = all_broken[dep_id]['nvra']
if all_broken[dep_id]['repo'] != rd_id:
dep_package_title += ' {' + \
reponames[all_broken[dep_id]['repo']] + '}'
dot_file.write('"%s" -> "%s" [color="blue"];\n' %
(package['nvra'], dep_package_title))
if dot_file:
OutputGraphTail(dot_file)
def OutputGraphHead(file_output, dg_name):
"""Output Graph head.
Static information about graph.
"""
file_output.write('\n\ndigraph "%s" {\n' % dg_name + \
'size="20.69,25.52";\nratio="fill";\n' + \
'rankdir="TB";\nnode[style="filled"];\nnode[shape="box"];\n\n')
def OutputGraphTail(file_output):
"""Finish the graph.
"""
file_output.write('}\n')
def detect_loops(dbc):
header = '===\n' \
'Loopbacks:'
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
loopbacks = dbc.execute("""
SELECT p.id, p.nvra, rpm_requires.name
FROM package_depend_res pdr, packages p, rpm_requires
WHERE pdr.package_id = p.id AND pdr.package_id = dep_package_id AND
rpm_requires.id = pdr.requires_id and p.repodir_id = ?
ORDER BY p.nvra, rpm_requires.name
""", [rd_id]).fetchall()
if loopbacks:
if header:
print header
header = None
print '%d) %s' % (rd_id, rd_name)
pre_pkg_id = None
pre_pkg_name = None
requires = []
cnt = 0
for lb_rec in loopbacks:
pkg_id = lb_rec[0]
pkg_name = lb_rec[1]
if pkg_id != pre_pkg_id:
cnt += 1
if pre_pkg_id is not None:
print '\t%s (%s)' % (pre_pkg_name, ','.join(requires))
requires = []
pre_pkg_id = pkg_id
pre_pkg_name = pkg_name
requires.append(lb_rec[2])
if pre_pkg_id is not None:
print '\t%s (%s)' % (pre_pkg_name, ','.join(requires))
print 'Total: %d' % cnt
def detect_lost_sources(dbc):
print '==='
print 'Lost sources:'
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs WHERE sources <> '.' ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
lost_sources = dbc.execute("""
SELECT name, nvra, sourcerpm FROM packages
WHERE repodir_id = ? AND
sourcerpm IS NOT NULL AND sourcerpm_package IS NULL
ORDER BY name
""", [rd_id]).fetchall()
if lost_sources:
print '%d) %s' % (rd_id, rd_name)
for ls in lost_sources:
print '\t%s (%s)' % (ls[1], ls[2])
print 'Total: %d' % len(lost_sources)
def analyze_partitioning(dbc):
print '==='
print 'Possible partitioning:'
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs WHERE sources <> '.' ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
partitions = []
partitioned_packages = []
singles = []
while True:
ppackages = ','.join(str(id) for id in partitioned_packages)
if not ppackages:
ppackages = '0'
pkg1_rec = dbc.execute("""
SELECT id, name, nvra
FROM packages WHERE repodir_id = ? AND id NOT IN (%s)
ORDER BY name
LIMIT 1""" % ppackages, [rd_id]).fetchall()
if not pkg1_rec:
break
if not partitioned_packages:
print '%d) %s' % (rd_id, rd_name)
(pkg_id, pkg_name) = (pkg1_rec[0][0], pkg1_rec[0][2])
partition_names = []
partition_names.append(pkg_name)
partition_ids = []
partition_ids.append(pkg_id)
partitioned_packages.append(pkg_id)
current_level_packages = [pkg_id]
while True:
cl_packages = ','.join(str(id) for id in current_level_packages)
part_packages = ','.join(str(id) for id in partition_ids)
upper_packages = dbc.execute("""
SELECT packages.id, packages.name, nvra
FROM packages, package_depend_res
WHERE packages.id = package_depend_res.package_id AND
packages.repodir_id = ? AND
package_depend_res.dep_package_id IN (%s) AND
packages.id NOT IN (%s)
ORDER BY packages.name
""" % (cl_packages, part_packages), [rd_id]).fetchall()
lower_packages = dbc.execute("""
SELECT packages.id, packages.name, nvra
FROM packages, package_depend_res
WHERE packages.id = package_depend_res.dep_package_id AND
packages.repodir_id = ? AND
package_depend_res.package_id IN (%s) AND
packages.id NOT IN (%s)
ORDER BY packages.name
""" % (cl_packages, part_packages), [rd_id]).fetchall()
if not upper_packages and not lower_packages:
break
current_level_packages = []
for rec in upper_packages:
if rec[0] not in current_level_packages:
current_level_packages.append(rec[0])
partitioned_packages.append(rec[0])
partition_ids.append(rec[0])
partition_names.append(rec[2])
for rec in lower_packages:
if rec[0] not in current_level_packages:
current_level_packages.append(rec[0])
partitioned_packages.append(rec[0])
partition_ids.append(rec[0])
partition_names.append(rec[2])
if len(partition_names) == 1:
#print partition_names
singles.append(partition_names[0])
#raise Exception('aaa')
else:
for p in sorted(partition_names):
print '\t%s' % p
print 'Total: %d' % len(partition_names)
print '---'
print ''
if len(singles) > 0:
print 'Singles:'
for s in sorted(singles):
print '\t%s' % s
print 'Total: %d' % len(singles)
def detect_lost_object_files(dbc):
header = '===\n' \
'Lost object (executable) files (provided but not found):'
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
lost_object_files = dbc.execute("""
SELECT nvra, package_files.path, mark
FROM packages, package_files
WHERE repodir_id = ? AND packages.id = package_files.package_id AND mark = 'not-found'
ORDER BY packages.name, package_files.path
""", [rd_id]).fetchall()
if lost_object_files:
if header:
print header
header = None
print '%d) %s' % (rd_id, rd_name)
for lof in lost_object_files:
print '\t%s: %s' % (lof[0], lof[1])
print 'Total: %d' % len(lost_object_files)
def detect_broken_object_links(dbc):
header = '===\n' \
'Invalid object (executable) file links:'
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
broken_object_links = dbc.execute("""
SELECT nvra, package_files.path, link_to_path, mark
FROM packages, package_files
WHERE repodir_id = ? AND packages.id = package_files.package_id AND
mark = 'link' AND link_to_path IS NOT NULL AND link_to_file_id IS NULL
ORDER BY packages.name, package_files.path
""", [rd_id]).fetchall()
if broken_object_links:
if header:
print header
header = None
print '%d) %s' % (rd_id, rd_name)
for bol in broken_object_links:
print '\t%s: %s -/-> %s' % \
(bol[0], bol[1], bol[2])
print 'Total: %d' % len(broken_object_links)
def get_repodir_depends(dbc, repodir_id):
dep_repos = dbc.execute("""
SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ?
""", [repodir_id]).fetchall()
return ', '.join([dep_repo[0] for dep_repo in dep_repos])
def detect_so_needed_not_resolved(dbc):
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
print '==='
print 'Objects needed and resolved by rpm requires-provides:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
objects_needed_resolved1 = dbc.execute("""
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROSS JOIN so_needed_res
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND so_needed_id = so_needed.id AND res_type = 1
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, objects_needed_resolved1[0])
print '==='
print 'Objects needed and resolved by flat search:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
objects_needed_resolved2 = dbc.execute("""
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROSS JOIN so_needed_res
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND so_needed_id = so_needed.id AND res_type = 2
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, objects_needed_resolved2[0])
header = '===' \
'Objects needed but not resolved:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
objects_needed_not_resolved = dbc.execute("""
SELECT packages.nvra, package_files.path, so_needed.name
FROM packages CROSS JOIN package_files CROSS JOIN so_needed
LEFT OUTER JOIN so_needed_res ON so_needed_id = so_needed.id
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND so_needed_id IS NULL
""", [rd_id]).fetchall()
if objects_needed_not_resolved:
repodir_depends = get_repodir_depends(dbc, rd_id)
if header:
print header
header = None
print ('%d) %s' % (rd_id, rd_name)) + \
('' if repodir_depends == '' else
(' (depends on: %s)' % repodir_depends))
for obj_nr in objects_needed_not_resolved:
print '\t%s: %s -?-> %s' % (obj_nr[0], obj_nr[1], obj_nr[2])
print 'Total: %d' % len(objects_needed_not_resolved)
def detect_symbols_not_resolved(dbc):
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
print '==='
print 'Symbols resolved by .so NEEDED search:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
symbols_resolved1_2 = dbc.execute("""
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CROSS JOIN obj_symbols_res
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND
package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND
obj_symbols_res.obj_sym_id = obj_symbols.id AND res_type IN (1, 2)
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, symbols_resolved1_2[0])
print '==='
print 'Symbols resolved by flat search:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
symbols_resolved3 = dbc.execute("""
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CROSS JOIN obj_symbols_res
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND
package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND
obj_symbols_res.obj_sym_id = obj_symbols.id AND res_type = 3
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, symbols_resolved3[0])
header = '===' \
'Symbols not resolved:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
symbols_not_resolved = dbc.execute("""
SELECT packages.nvra, package_files.path, obj_symbols.name
FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND
package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND
NOT EXISTS (SELECT 1 FROM obj_symbols_res WHERE obj_sym_id = obj_symbols.id)
""", [rd_id]).fetchall()
if symbols_not_resolved:
repodir_depends = get_repodir_depends(dbc, rd_id)
if header:
print header
header = None
print ('%d) %s' % (rd_id, rd_name)) + \
('' if repodir_depends == '' else
(' (depends on: %s)' % repodir_depends))
for sym_nr in symbols_not_resolved:
print '\t%s: %s -?-> %s' % (sym_nr[0], sym_nr[1], sym_nr[2])
print 'Total: %d' % len(symbols_not_resolved)
def main(args):
options = parseargs()
conn = sqlite3.connect(DB)
dbc = conn.cursor()
detect_broken_dependencies(dbc, options.dot_graphs)
#detect_loops(dbc)
detect_lost_sources(dbc)
analyze_partitioning(dbc)
detect_lost_object_files(dbc)
detect_broken_object_links(dbc)
detect_so_needed_not_resolved(dbc)
detect_symbols_not_resolved(dbc)
conn.close()
if __name__ == "__main__":
main(sys.argv)