repo-analyzer/analyze-repodb.py

404 lines
16 KiB
Python
Raw Normal View History

#!/usr/bin/python
# -*- coding: utf-8 -*-
2014-02-06 11:44:02 +04:00
import os
import sys
import gettext
import argparse
import sqlite3
import string
import rpm
import re
DB = 'repo.db'
def detect_broken_dependencies(dbc):
def print_broken_packages():
for rpb_name in sorted(repo_packages_broken.keys()):
rpb_id = repo_packages_broken[rpb_name]
dep_chain = []
dep_id = all_broken[rpb_id]['depid']
while dep_id != 0:
dep_chain.append('%s (%d)' % (all_broken[dep_id]['nvra'], all_broken[dep_id]['repo']))
dep_id = all_broken[dep_id]['depid']
print '\t' + rpb_name + ' => '+ (' => '.join(dep_chain))
print 'Total: %d' % len(repo_packages_broken)
print ''
# Detect broken dependencies with recursion
repodirs_analyzed = []
broken = {}
broken_level0 = dbc.execute("""
SELECT packages.id, nvra, repodir_id, repodirs.name,
rpm_requires.name AS req_name, rpm_requires.build_arch AS req_arch
FROM packages, repodirs, rpm_requires
WHERE packages.repodir_id = repodirs.id AND
packages.id=rpm_requires.package_id AND
NOT EXISTS (SELECT 1 FROM package_depend_res pdr
WHERE pdr.package_id = packages.id AND
pdr.requires_id = rpm_requires.id)
2014-02-06 11:44:02 +04:00
ORDER BY repodir_id, nvra""").fetchall()
all_broken = {}
if len(broken_level0) > 0:
print 'Broken dependencies (bottom level):'
bp_reqs = []
pre_repodir_id = -1
pre_bp_id = -1
pre_bp_nvra = -1
pre_cnt = 0
for bp in broken_level0:
(bp_id, bp_nvra, bp_repodir_id, bp_repodir_name, bp_reqname, bp_reqarch) = \
(bp[0], bp[1], bp[2], bp[3], bp[4], bp[5])
broken[bp_id] = bp_nvra
if pre_bp_id != bp_id and pre_bp_id != -1:
print '\t%s (%s)' % (pre_bp_nvra, ', '.join(bp_reqs))
pre_cnt += 1
bp_reqs = []
if bp_reqarch is not None:
bp_reqname += ':' + bp_reqarch
if bp_reqname not in bp_reqs:
bp_reqs.append(bp_reqname)
if pre_repodir_id != bp_repodir_id:
if pre_repodir_id != -1:
print 'Total: %d' % pre_cnt
print '%d) %s' % (bp_repodir_id, bp_repodir_name)
pre_repodir_id = bp_repodir_id
pre_cnt = 0
if bp_id not in all_broken:
all_broken[bp_id] = {'repo': bp_repodir_id, 'nvra': bp_nvra, 'reqname': bp_reqname, 'depid': 0}
pre_bp_id = bp_id
pre_bp_nvra = bp_nvra
if pre_bp_id != -1:
print '\t%s (%s)' % (pre_bp_nvra, ','.join(bp_reqs))
print 'Total: %d' % pre_cnt
all_broken_cnt = -1
broken_recursive = []
while all_broken_cnt < len(all_broken):
all_broken_cnt = len(all_broken)
pids = ','.join(str(id) for id in all_broken.keys())
packages_broken_recurs = dbc.execute("""
SELECT packages.id, nvra, repodir_id, repodirs.name,
rpm_requires.name AS req_name, build_arch, dep_package_id
FROM packages, repodirs, rpm_requires, package_depend_res
WHERE packages.repodir_id = repodirs.id AND
packages.id = rpm_requires.package_id AND
packages.id = package_depend_res.package_id AND
rpm_requires.id = package_depend_res.requires_id AND
dep_package_id IN (%(pids)s) AND
packages.id NOT IN (%(pids)s)
2014-02-06 11:44:02 +04:00
ORDER BY repodir_id, nvra""" % {'pids': pids}).fetchall()
# print len(packages_broken_recurs)
for packb in packages_broken_recurs:
all_broken[packb[0]] = {'repo': packb[2], 'nvra': packb[1],
'reqname': packb[4], 'build_arch': packb[5],
'depid': packb[6]}
2014-02-06 11:44:02 +04:00
broken_recursive.append(packb[0])
#print len(all_broken.keys())
if broken_recursive:
print 'Recursive broken dependencies:'
all_repodirs = dbc.execute("""
SELECT id, name, sources FROM repodirs ORDER BY id""").fetchall()
for rd in all_repodirs:
(rd_id, rd_name, rd_sources) = (rd[0], rd[1], rd[2])
if rd_sources == '.':
archs = dbc.execute("""
SELECT DISTINCT build_arch FROM rpm_requires
WHERE package_id IN (SELECT id FROM packages WHERE repodir_id = ?)
2014-02-06 11:44:02 +04:00
""", [rd_id]).fetchall()
for arch_rec in archs:
repo_packages_broken = {all_broken[id]['nvra']: id for id in broken_recursive \
if all_broken[id]['repo'] == rd_id and all_broken[id]['build_arch'] == arch_rec[0]}
if repo_packages_broken:
print '%d) %s (%s)' % (rd_id, rd_name, arch_rec[0])
print_broken_packages()
else:
repo_packages_broken = {all_broken[id]['nvra']: id for id in broken_recursive if all_broken[id]['repo'] == rd_id}
if repo_packages_broken:
print '%d) %s' % (rd_id, rd_name)
print_broken_packages()
def detect_lost_sources(dbc):
print '==='
print 'Lost sources:'
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs WHERE sources <> '.' ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
lost_sources = dbc.execute("""
SELECT name, nvra, sourcerpm FROM packages
WHERE repodir_id = ? AND
sourcerpm IS NOT NULL AND sourcerpm_package IS NULL
ORDER BY name
""", [rd_id]).fetchall()
if lost_sources:
print '%d) %s' % (rd_id, rd_name)
for ls in lost_sources:
print '\t%s (%s)' % (ls[1], ls[2])
print 'Total: %d' % len(lost_sources)
def analyze_partitioning(dbc):
print '==='
print 'Possible partitioning:'
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs WHERE sources <> '.' ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
partitions = []
partitioned_packages = []
singles = []
while True:
ppackages = ','.join(str(id) for id in partitioned_packages)
if not ppackages:
ppackages = '0'
pkg1_rec = dbc.execute("""
SELECT id, name, nvra
FROM packages WHERE repodir_id = ? AND id NOT IN (%s)
ORDER BY name
LIMIT 1""" % ppackages, [rd_id]).fetchall()
if not pkg1_rec:
break
if not partitioned_packages:
print '%d) %s' % (rd_id, rd_name)
(pkg_id, pkg_name) = (pkg1_rec[0][0], pkg1_rec[0][2])
partition_names = []
partition_names.append(pkg_name)
partition_ids = []
partition_ids.append(pkg_id)
partitioned_packages.append(pkg_id)
current_level_packages = [pkg_id]
while True:
cl_packages = ','.join(str(id) for id in current_level_packages)
part_packages = ','.join(str(id) for id in partition_ids)
upper_packages = dbc.execute("""
SELECT packages.id, packages.name, nvra
FROM packages, package_depend_res
WHERE packages.id = package_depend_res.package_id AND
2014-02-06 11:44:02 +04:00
packages.repodir_id = ? AND
package_depend_res.dep_package_id IN (%s) AND
2014-02-06 11:44:02 +04:00
packages.id NOT IN (%s)
ORDER BY packages.name
""" % (cl_packages, part_packages), [rd_id]).fetchall()
lower_packages = dbc.execute("""
SELECT packages.id, packages.name, nvra
FROM packages, package_depend_res
WHERE packages.id = package_depend_res.dep_package_id AND
2014-02-06 11:44:02 +04:00
packages.repodir_id = ? AND
package_depend_res.package_id IN (%s) AND
2014-02-06 11:44:02 +04:00
packages.id NOT IN (%s)
ORDER BY packages.name
""" % (cl_packages, part_packages), [rd_id]).fetchall()
if not upper_packages and not lower_packages:
break
current_level_packages = []
for rec in upper_packages:
if rec[0] not in current_level_packages:
current_level_packages.append(rec[0])
partitioned_packages.append(rec[0])
partition_ids.append(rec[0])
partition_names.append(rec[2])
for rec in lower_packages:
if rec[0] not in current_level_packages:
current_level_packages.append(rec[0])
partitioned_packages.append(rec[0])
partition_ids.append(rec[0])
partition_names.append(rec[2])
if len(partition_names) == 1:
#print partition_names
singles.append(partition_names[0])
#raise Exception('aaa')
else:
for p in sorted(partition_names):
2014-02-06 14:51:12 +04:00
print '\t%s' % p
2014-02-06 11:44:02 +04:00
print 'Total: %d' % len(partition_names)
print '---'
print ''
if len(singles) > 0:
print 'Singles:'
for s in sorted(singles):
print '\t%s' % s
print 'Total: %d' % len(singles)
def detect_lost_object_files(dbc):
2014-02-07 13:53:14 +04:00
header = '===\n' \
'Lost object (executable) files (provided but not found):'
2014-02-06 11:44:02 +04:00
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
lost_object_files = dbc.execute("""
SELECT nvra, package_files.path, mark
FROM packages, package_files
WHERE repodir_id = ? AND packages.id = package_files.package_id AND mark = 'not-found'
ORDER BY packages.name, package_files.path
""", [rd_id]).fetchall()
if lost_object_files:
2014-02-07 13:53:14 +04:00
if header:
print header
header = None
2014-02-06 11:44:02 +04:00
print '%d) %s' % (rd_id, rd_name)
for lof in lost_object_files:
print '\t%s: %s' % (lof[0], lof[1])
print 'Total: %d' % len(lost_object_files)
def detect_broken_object_links(dbc):
2014-02-07 13:53:14 +04:00
header = '===\n' \
'Invalid object (executable) file links:'
2014-02-06 11:44:02 +04:00
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
broken_object_links = dbc.execute("""
SELECT nvra, package_files.path, link_to_path, mark
FROM packages, package_files
WHERE repodir_id = ? AND packages.id = package_files.package_id AND
mark = 'link' AND link_to_path IS NOT NULL AND link_to_file_id IS NULL
ORDER BY packages.name, package_files.path
""", [rd_id]).fetchall()
if broken_object_links:
2014-02-07 13:53:14 +04:00
if header:
print header
header = None
2014-02-06 11:44:02 +04:00
print '%d) %s' % (rd_id, rd_name)
for bol in broken_object_links:
print '\t%s: %s -/-> %s' % \
(bol[0], bol[1], bol[2])
print 'Total: %d' % len(broken_object_links)
def get_repodir_depends(dbc, repodir_id):
dep_repos = dbc.execute("""
SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ?
""", [repodir_id]).fetchall()
return ', '.join([dep_repo[0] for dep_repo in dep_repos])
2014-02-07 13:53:14 +04:00
def detect_so_needed_not_resolved(dbc):
2014-02-06 11:44:02 +04:00
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
print '==='
print 'Objects needed and resolved by rpm requires-provides:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
objects_needed_resolved1 = dbc.execute("""
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROSS JOIN so_needed_res
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND so_needed_id = so_needed.id AND res_type = 1
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, objects_needed_resolved1[0])
print '==='
print 'Objects needed and resolved by flat search:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
objects_needed_resolved2 = dbc.execute("""
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROSS JOIN so_needed_res
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND so_needed_id = so_needed.id AND res_type = 2
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, objects_needed_resolved2[0])
2014-02-07 13:53:14 +04:00
header = '===' \
'Objects needed but not resolved:'
2014-02-06 11:44:02 +04:00
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
objects_needed_not_resolved = dbc.execute("""
SELECT packages.nvra, package_files.path, so_needed.name
FROM packages CROSS JOIN package_files CROSS JOIN so_needed
LEFT OUTER JOIN so_needed_res ON so_needed_id = so_needed.id
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND so_needed_id IS NULL
""", [rd_id]).fetchall()
if objects_needed_not_resolved:
repodir_depends = get_repodir_depends(dbc, rd_id)
2014-02-07 13:53:14 +04:00
if header:
print header
header = None
2014-02-06 11:44:02 +04:00
print ('%d) %s' % (rd_id, rd_name)) + \
('' if repodir_depends == '' else
(' (depends on: %s)' % repodir_depends))
for obj_nr in objects_needed_not_resolved:
print '\t%s: %s -?-> %s' % (obj_nr[0], obj_nr[1], obj_nr[2])
print 'Total: %d' % len(objects_needed_not_resolved)
2014-02-07 13:53:14 +04:00
def detect_symbols_not_resolved(dbc):
2014-02-06 11:44:02 +04:00
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs ORDER BY id
""").fetchall()
print '==='
print 'Symbols resolved by .so NEEDED search:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
symbols_resolved1_2 = dbc.execute("""
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CROSS JOIN obj_symbols_res
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND
package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND
obj_symbols_res.obj_sym_id = obj_symbols.id AND res_type IN (1, 2)
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, symbols_resolved1_2[0])
print '==='
print 'Symbols resolved by flat search:'
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
symbols_resolved3 = dbc.execute("""
SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CROSS JOIN obj_symbols_res
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND
package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND
obj_symbols_res.obj_sym_id = obj_symbols.id AND res_type = 3
""", [rd_id]).fetchone()
print '%d) %s: %d' % (rd_id, rd_name, symbols_resolved3[0])
2014-02-07 13:53:14 +04:00
header = '===' \
'Symbols not resolved:'
2014-02-06 11:44:02 +04:00
for repodir in repodirs:
(rd_id, rd_name) = (repodir[0], repodir[1])
symbols_not_resolved = dbc.execute("""
SELECT packages.nvra, package_files.path, obj_symbols.name
FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND
package_files.id = obj_symbols.obj_file_id AND sym_type = 0 AND
NOT EXISTS (SELECT 1 FROM obj_symbols_res WHERE obj_sym_id = obj_symbols.id)
""", [rd_id]).fetchall()
if symbols_not_resolved:
repodir_depends = get_repodir_depends(dbc, rd_id)
2014-02-07 13:53:14 +04:00
if header:
print header
header = None
2014-02-06 11:44:02 +04:00
print ('%d) %s' % (rd_id, rd_name)) + \
('' if repodir_depends == '' else
(' (depends on: %s)' % repodir_depends))
for sym_nr in symbols_not_resolved:
print '\t%s: %s -?-> %s' % (sym_nr[0], sym_nr[1], sym_nr[2])
print 'Total: %d' % len(symbols_not_resolved)
def main(args):
conn = sqlite3.connect(DB)
dbc = conn.cursor()
detect_broken_dependencies(dbc)
detect_lost_sources(dbc)
analyze_partitioning(dbc)
detect_lost_object_files(dbc)
detect_broken_object_links(dbc)
2014-02-07 13:53:14 +04:00
detect_so_needed_not_resolved(dbc)
detect_symbols_not_resolved(dbc)
2014-02-06 11:44:02 +04:00
conn.close()
if __name__ == "__main__":
main(sys.argv)