repo-analyzer/prepare-repodb.py
2014-02-06 14:20:28 +04:00

384 lines
17 KiB
Python
Executable file

#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import sys
import gettext
import argparse
import sqlite3
import string
import re
from versutils import *
DB = 'repo.db'
def process_repodir_requires(dbc, repodir_id, repodir_name, repodir_depends, requires_build_arch):
global n
print 'Processing repo %d: %s (with depends: %s)' % (repodir_id, repodir_name, str(repodir_depends))
package_requires = dbc.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
requires.id, requires.name, flags, requires.version
FROM packages, requires
WHERE repodir_id = ? AND requires.package_id = packages.id %s
ORDER BY packages.name, requires.name
""" % ((" AND build_arch = '%s'" % requires_build_arch)
if requires_build_arch is not None else ""),
[repodir_id]).fetchall()
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
# print 'package requires count: ', len(package_requires)
broken_dep = 0
requires_cache = {}
# TODO: Reuse the cache for dependent repositories???
for packreq in package_requires:
(cpackage_id, package_nvra, requires_id, requires_name, requires_flags, requires_version) = \
(packreq[0], packreq[2], packreq[3], packreq[4], packreq[5], packreq[6])
# if requires_name != '/usr/sbin/glibc-post-wrapper':
# continue
# print packreq
requirement_uid = requires_name + '\0' + str(requires_flags) + '\0' + requires_version
pkg_id = requires_cache.get(requirement_uid)
if pkg_id is None:
if (re.match(r'\A(rpmlib|executable)\(.+\)\Z', requires_name)):
# see if($N=~/\A(rpmlib|executable)\(.+\)\Z/) in urpm_repoclosure.pl
pkg_id = -1
if pkg_id is None:
depend_candidates = dbc.execute("""
SELECT packages.id AS package_id, packages.name AS package_name,
provides.id, provides.name, flags, provides.version
FROM packages, provides
WHERE provides.package_id = packages.id AND repodir_id IN (%s) AND provides.name = ?
""" % in_repodirs, [requires_name]).fetchall()
provides_found_pkg_id = None
provides_found_id = None
provides_found_version = None
for dep_cand in depend_candidates:
(provides_pkg_id, provides_id, provides_flags, provides_version) = \
(dep_cand[0], dep_cand[2], dep_cand[4], dep_cand[5])
#print 'provides_version: ', provides_flags, ' ', provides_version
if provides_flags & RPMSENSE_SENSEMASK == 0:
if not provides_version:
provides_version = '*'
else:
raise Exception('Invalid provides version (flags = %d, version = %s)!' %
(provides_flags, provides_version))
if version_ok(requires_version,
requires_flags & RPMSENSE_SENSEMASK,
provides_version):
better_version = provides_found_version is None or \
provides_version == '*'
if not better_version:
better_version = version_ok(provides_version,
RPMSENSE_GREATER,
provides_found_version)
if better_version:
(provides_found_pkg_id,
provides_found_id,
provides_found_version) = \
(provides_pkg_id,
provides_id,
provides_version)
pkg_id = provides_found_pkg_id
#print "->", provides_found_version, ' ->', provides_found_pkg_id
if pkg_id is None and (requires_flags & RPMSENSE_MISSINGOK) != 0:
pkg_id = -1 # TODO: Fix for valid foreign key
if pkg_id is None and requires_name.startswith('/'): # file dependency
if (requires_flags & (RPMSENSE_SCRIPT_POST |
RPMSENSE_SCRIPT_PREUN |
RPMSENSE_SCRIPT_POSTUN)) != 0:
internal_files = dbc.execute("""
SELECT 1 FROM package_files WHERE package_id = ? AND path = ?
""", [cpackage_id, requires_name]).fetchall()
if len(internal_files) > 0:
pkg_id = cpackage_id
else:
#TODO: Check file dependencies (/usr/bin/python (required by ant-scripts-1.7.1-7.0.6.noarch), /usr/sbin/useradd (required by tomcat5-5.5.28-0.5.2.noarch))?
files_dependency = dbc.execute("""
SELECT package_id FROM package_files
WHERE path = ? AND
package_id in (SELECT id FROM packages WHERE repodir_id IN (%s))
""" % in_repodirs, [requires_name]).fetchall()
if len(files_dependency) > 0:
if len(files_dependency) == 1:
pkg_id = files_dependency[0][0]
else:
print "File dependency (%s) has multiple resolutions (%d)." % (requires_name, len(files_dependency))
if pkg_id is not None:
dbc.execute("""
UPDATE requires SET dep_package_id = ? WHERE id = ?
""", [pkg_id, requires_id])
requires_cache[requirement_uid] = pkg_id
else:
print requires_name, ' ', requires_version, ' (required by %s)' % package_nvra, ' not found!!!'
broken_dep += 1
n = n + 1
#print "n = ", n
# if n == 60000:
# break
print 'broken_deps: ', broken_dep
print ''
def extract_arch(arch_template, repo_name):
arch_sign = '$arch'
spos = arch_template.find(arch_sign)
if spos >= 0:
repo_prefix= arch_template[:spos]
repo_postfix = arch_template[spos + len(arch_sign):]
if repo_name.startswith(repo_prefix) and \
repo_name.endswith(repo_postfix):
return repo_name[len(repo_prefix) :
len(repo_name) - len(repo_postfix)]
return None
def process_repodir_file_links(dbc, repodir_id, repodir_name, repodir_depends):
package_files_links = dbc.execute("""
SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra,
package_files.id AS object_id, package_files.path, package_files.link_to_path
FROM packages, package_files
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
link_to_path IS NOT NULL
ORDER BY packages.name, link_to_path
""", [repodir_id]).fetchall()
for file_link in package_files_links:
pkg_id = file_link[0]
pkg_name = file_link[1]
object_id = file_link[3]
target_path = os.path.normpath(file_link[5])
target_obj_id = None
tofile = dbc.execute("""
SELECT id FROM package_files WHERE path = ? AND package_id = ?
""", [target_path, pkg_id]).fetchone()
if tofile:
target_obj_id = tofile[0]
if not target_obj_id:
tofile = dbc.execute("""
SELECT id FROM package_files WHERE path = ? AND package_id IN (SELECT dep_package_id FROM requires WHERE package_id = ?)
""", [target_path, pkg_id]).fetchone()
if tofile:
target_obj_id = tofile[0]
if target_obj_id:
dbc.execute("""
UPDATE package_files SET link_to_file_id = ? WHERE id = ?
""", [target_obj_id, object_id])
else:
# print 'target %s not found (%d: %s)' % (target_path, pkg_id, pkg_name)
pass
def process_repodir_so_needed(dbc, repodir_id, repodir_name, repodir_depends):
print 'Searching object files resolutions (1)...'
dbc.execute("""
INSERT INTO so_needed_res(so_needed_id, dep_obj_file_id, res_type)
SELECT so_needed.id, tpf.id, 1 FROM packages
CROSS JOIN package_files spf CROSS JOIN so_needed CROSS JOIN requires CROSS JOIN package_files tpf
WHERE so_needed.obj_file_id = spf.id AND spf.package_id = packages.id AND packages.repodir_id = ? AND
spf.package_id = requires.package_id AND
(so_needed.name = requires.name OR so_needed.name || '()(64bit)' = requires.name) AND
requires.dep_package_id = tpf.package_id AND so_needed.name = tpf.basename
""", [repodir_id])
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
objects_not_resolved1 = dbc.execute("""
SELECT packages.id AS package_id, packages.nvra,
package_files.id AS object_id, package_files.basename AS object_name,
so_needed.id AS so_needed_id, so_needed.name AS so_needed_name
FROM packages CROSS JOIN package_files CROSS JOIN so_needed
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND
NOT EXISTS (SELECT 1 FROM so_needed_res WHERE so_needed_res.so_needed_id = so_needed.id)
ORDER BY packages.nvra, package_files.basename, so_needed.name
""", [repodir_id]).fetchall()
print 'Object files not resolved by rpm requires-provides: ', len(objects_not_resolved1)
if objects_not_resolved1:
print 'Searching object files resolutions (2)...'
in_so_needed = ','.join(str(obj_rec[4]) for obj_rec in objects_not_resolved1)
dbc.execute("""
INSERT INTO so_needed_res(so_needed_id, dep_obj_file_id, res_type)
SELECT so_needed.id, tpf.id, 2 FROM packages, package_files tpf, so_needed
WHERE packages.repodir_id IN (%s) AND packages.id = tpf.package_id AND
so_needed.id IN (%s) AND tpf.basename = so_needed.name
""" % (in_repodirs, in_so_needed))
objects_not_resolved2 = dbc.execute("""
SELECT packages.id AS package_id, packages.nvra,
package_files.id AS object_id, package_files.basename AS object_name,
so_needed.id AS so_needed_id, so_needed.name AS so_needed_name
FROM packages, package_files, so_needed
WHERE repodir_id = ? AND package_files.package_id = packages.id AND
so_needed.obj_file_id = package_files.id AND
NOT EXISTS (SELECT 1 FROM so_needed_res WHERE so_needed_res.so_needed_id = so_needed.id)
ORDER BY packages.nvra, package_files.basename, so_needed.name
""", [repodir_id]).fetchall()
print 'Object files not resolved: ', len(objects_not_resolved2)
def process_repodir_obj_symbols(dbc, repodir_id, repodir_name, repodir_depends):
print 'Searching symbols resolutions (1)...'
# EXPLAIN QUERY PLAN
dbc.execute("""
INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type)
SELECT sos.id, tos.id, 1 FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols sos CROSS JOIN
so_needed CROSS JOIN so_needed_res CROSS JOIN obj_symbols tos
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = sos.obj_file_id AND
sos.sym_type = 0 AND sos.obj_file_id = so_needed.obj_file_id AND so_needed.id = so_needed_res.so_needed_id AND
so_needed_res.res_type = 1 AND so_needed_res.dep_obj_file_id = tos.obj_file_id AND
tos.sym_type = 1 AND tos.name = sos.name
""", [repodir_id])
print 'Searching symbols resolutions (2)...'
dbc.execute("""
INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type)
SELECT sos.id, tos.id, 2 FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols sos CROSS JOIN
so_needed CROSS JOIN so_needed_res CROSS JOIN obj_symbols tos
WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = sos.obj_file_id AND
sos.sym_type = 0 AND sos.obj_file_id = so_needed.obj_file_id AND so_needed.id = so_needed_res.so_needed_id AND
so_needed_res.res_type = 2 AND so_needed_res.dep_obj_file_id = tos.obj_file_id AND
tos.sym_type = 1 AND tos.name = sos.name
""", [repodir_id])
print 'Searching symbols resolutions (3)...'
search_repodirs = [repodir_id]
search_repodirs.extend(repodir_depends)
in_repodirs = ','.join(str(id) for id in search_repodirs)
dbc.execute("""
INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type)
SELECT sos.id, tos.id, 3 FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols sos CROSS JOIN
obj_symbols tos CROSS JOIN package_files tpf
WHERE repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = sos.obj_file_id AND
sos.sym_type = 0 AND NOT EXISTS (SELECT 1 FROM obj_symbols_res WHERE obj_sym_id = sos.id) AND
sos.name = tos.name AND tos.sym_type = 1 AND tos.obj_file_id = tpf.id AND
tpf.package_id IN (SELECT id FROM packages WHERE repodir_id IN (%s))
""" % in_repodirs, [repodir_id])
def process_repodir(dbc, repo_id, repo_name, repo_sources, depend_repodir_list, repodirs_processed, dep_arch):
all_depends_ready = True
repodir_depends = []
in_repodirs = ','.join(str(id) for id in repodirs_processed)
for dr_name in depend_repodir_list:
repodir_depend_found = dbc.execute("""
SELECT id, name FROM repodirs WHERE id IN (%s) AND name = ?
""" % in_repodirs, [dr_name]).fetchall()
if len(repodir_depend_found) == 0:
all_depends_ready = False
break
else:
for rdf in repodir_depend_found:
repodir_depends.append(rdf[0])
if not all_depends_ready:
return False
print repo_name, ' ', depend_repodir_list, ' ', dep_arch
process_repodir_requires(dbc, repo_id, repo_name, repodir_depends, dep_arch)
process_repodir_file_links(dbc, repo_id, repo_name, repodir_depends)
process_repodir_so_needed(dbc, repo_id, repo_name, repodir_depends)
process_repodir_obj_symbols(dbc, repo_id, repo_name, repodir_depends)
if repo_sources:
print 'Searching source rpms...'
dbc.execute("""
UPDATE packages SET sourcerpm_package = NULL
WHERE repodir_id = ?""", [repo_id])
dbc.execute("""
UPDATE packages SET sourcerpm_package =
(SELECT id FROM packages ps
WHERE repodir_id IN (SELECT id FROM repodirs WHERE name = ?) AND
ps.nvra = substr(packages.sourcerpm, 1, length(packages.sourcerpm)-4)
)
WHERE repodir_id = ? AND sourcerpm LIKE '%.rpm'
""", [repo_sources, repo_id])
return True
def main(args):
conn = sqlite3.connect(DB)
dbc = conn.cursor()
global n
n = 0
dbc.execute("""
PRAGMA cache_size = -1048576
""")
dbc.execute("""
DELETE FROM so_needed_res""")
dbc.execute("""
DELETE FROM obj_symbols_res""")
dbc.execute("""
ANALYZE""")
repodirs_processed = []
#Process binary rpms
repodirs_processed_cnt = -1
while repodirs_processed_cnt < len(repodirs_processed):
in_repodirs = ','.join(str(id) for id in repodirs_processed)
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs WHERE sources <> '.' AND id NOT IN (%s)
""" % in_repodirs).fetchall()
for repodir in repodirs:
(repo_id, repo_name, repo_sources) = (repodir[0], repodir[1], repodir[2])
depend_repodir_names = dbc.execute(
"""
SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ?
""", [repo_id]).fetchall()
depend_repodir_list = [drn[0] for drn in depend_repodir_names]
if process_repodir(dbc, repo_id, repo_name, repo_sources, depend_repodir_list, repodirs_processed, None):
repodirs_processed.append(repo_id)
repodirs_processed_cnt = len(repodirs_processed)
#Process SRPMS
repodirs_processed_cnt = -1
while repodirs_processed_cnt < len(repodirs_processed):
repodirs = dbc.execute("""
SELECT id, name, sources, path FROM repodirs WHERE sources = '.'
""").fetchall()
for repodir in repodirs:
(repo_id, repo_name, repo_sources) = (repodir[0], repodir[1], repodir[2])
src_build_archs = []
depend_repodir_names = dbc.execute(
"""
SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ?
""", [repo_id]).fetchall()
for drn in depend_repodir_names:
dr_name = drn[0]
if '$arch' in dr_name:
depend_repodir_found = dbc.execute(
"""
SELECT id, name FROM repodirs WHERE name LIKE ?
""", [dr_name.replace('$arch', '%')]).fetchall()
if len(depend_repodir_found) == 0:
raise Exception('Dependancy repositories not found!')
for drf in depend_repodir_found:
arch = extract_arch(dr_name, drf[1])
if arch:
if arch == 'SRPMS':
continue
src_build_archs.append(arch)
else:
raise Exception('Source repository should depend on */$arch/* repo.')
for arch in src_build_archs:
depend_repodir_list = [drn[0].replace('$arch', arch)
for drn in depend_repodir_names]
if not process_repodir(dbc, repo_id, repo_name, None, depend_repodir_list, repodirs_processed, arch):
raise Exception('Couldn\'t process SRPMS repository!')
repodirs_processed.append(repo_id)
repodirs_processed_cnt = len(repodirs_processed)
in_repodirs = ','.join(str(id) for id in repodirs_processed)
repodirs_not_processed = dbc.execute("""
SELECT id, name, sources, path FROM repodirs rd WHERE id NOT IN (%s)
""" % in_repodirs).fetchall()
if len(repodirs_not_processed) > 0:
print 'Repodirs not processed due to dependencies:'
for rdna in repodirs_not_processed:
print rdna[1]
dbc.execute("""
ANALYZE""")
conn.commit()
if __name__ == "__main__":
main(sys.argv)