#!/usr/bin/python # -*- coding: utf-8 -*- import os import sys import gettext import argparse import sqlite3 import string import re from versutils import * DB = 'repo.db' def process_repodir_requires(dbc, repodir_id, repodir_name, repodir_depends, requires_build_arch): global n print 'Processing repo %d: %s (with depends: %s)' % (repodir_id, repodir_name, str(repodir_depends)) package_requires = dbc.execute(""" SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra, requires.id, requires.name, flags, requires.version FROM packages, requires WHERE repodir_id = ? AND requires.package_id = packages.id %s ORDER BY packages.name, requires.name """ % ((" AND build_arch = '%s'" % requires_build_arch) if requires_build_arch is not None else ""), [repodir_id]).fetchall() search_repodirs = [repodir_id] search_repodirs.extend(repodir_depends) in_repodirs = ','.join(str(id) for id in search_repodirs) # print 'package requires count: ', len(package_requires) broken_dep = 0 requires_cache = {} # TODO: Reuse the cache for dependent repositories??? for packreq in package_requires: (cpackage_id, package_nvra, requires_id, requires_name, requires_flags, requires_version) = \ (packreq[0], packreq[2], packreq[3], packreq[4], packreq[5], packreq[6]) # if requires_name != '/usr/sbin/glibc-post-wrapper': # continue # print packreq requirement_uid = requires_name + '\0' + str(requires_flags) + '\0' + requires_version pkg_id = requires_cache.get(requirement_uid) if pkg_id is None: if (re.match(r'\A(rpmlib|executable)\(.+\)\Z', requires_name)): # see if($N=~/\A(rpmlib|executable)\(.+\)\Z/) in urpm_repoclosure.pl pkg_id = -1 if pkg_id is None: depend_candidates = dbc.execute(""" SELECT packages.id AS package_id, packages.name AS package_name, provides.id, provides.name, flags, provides.version FROM packages, provides WHERE provides.package_id = packages.id AND repodir_id IN (%s) AND provides.name = ? """ % in_repodirs, [requires_name]).fetchall() provides_found_pkg_id = None provides_found_id = None provides_found_version = None for dep_cand in depend_candidates: (provides_pkg_id, provides_id, provides_flags, provides_version) = \ (dep_cand[0], dep_cand[2], dep_cand[4], dep_cand[5]) #print 'provides_version: ', provides_flags, ' ', provides_version if provides_flags & RPMSENSE_SENSEMASK == 0: if not provides_version: provides_version = '*' else: raise Exception('Invalid provides version (flags = %d, version = %s)!' % (provides_flags, provides_version)) if version_ok(requires_version, requires_flags & RPMSENSE_SENSEMASK, provides_version): better_version = provides_found_version is None or \ provides_version == '*' if not better_version: better_version = version_ok(provides_version, RPMSENSE_GREATER, provides_found_version) if better_version: (provides_found_pkg_id, provides_found_id, provides_found_version) = \ (provides_pkg_id, provides_id, provides_version) pkg_id = provides_found_pkg_id #print "->", provides_found_version, ' ->', provides_found_pkg_id if pkg_id is None and (requires_flags & RPMSENSE_MISSINGOK) != 0: pkg_id = -1 # TODO: Fix for valid foreign key if pkg_id is None and requires_name.startswith('/'): # file dependency if (requires_flags & (RPMSENSE_SCRIPT_POST | RPMSENSE_SCRIPT_PREUN | RPMSENSE_SCRIPT_POSTUN)) != 0: internal_files = dbc.execute(""" SELECT 1 FROM package_files WHERE package_id = ? AND path = ? """, [cpackage_id, requires_name]).fetchall() if len(internal_files) > 0: pkg_id = cpackage_id else: #TODO: Check file dependencies (/usr/bin/python (required by ant-scripts-1.7.1-7.0.6.noarch), /usr/sbin/useradd (required by tomcat5-5.5.28-0.5.2.noarch))? files_dependency = dbc.execute(""" SELECT package_id FROM package_files WHERE path = ? AND package_id in (SELECT id FROM packages WHERE repodir_id IN (%s)) """ % in_repodirs, [requires_name]).fetchall() if len(files_dependency) > 0: if len(files_dependency) == 1: pkg_id = files_dependency[0][0] else: print "File dependency (%s) has multiple resolutions (%d)." % (requires_name, len(files_dependency)) if pkg_id is not None: dbc.execute(""" UPDATE requires SET dep_package_id = ? WHERE id = ? """, [pkg_id, requires_id]) requires_cache[requirement_uid] = pkg_id else: print requires_name, ' ', requires_version, ' (required by %s)' % package_nvra, ' not found!!!' broken_dep += 1 n = n + 1 #print "n = ", n # if n == 60000: # break print 'broken_deps: ', broken_dep print '' def extract_arch(arch_template, repo_name): arch_sign = '$arch' spos = arch_template.find(arch_sign) if spos >= 0: repo_prefix= arch_template[:spos] repo_postfix = arch_template[spos + len(arch_sign):] if repo_name.startswith(repo_prefix) and \ repo_name.endswith(repo_postfix): return repo_name[len(repo_prefix) : len(repo_name) - len(repo_postfix)] return None def process_repodir_file_links(dbc, repodir_id, repodir_name, repodir_depends): package_files_links = dbc.execute(""" SELECT packages.id AS package_id, packages.name AS package_name, packages.nvra, package_files.id AS object_id, package_files.path, package_files.link_to_path FROM packages, package_files WHERE repodir_id = ? AND package_files.package_id = packages.id AND link_to_path IS NOT NULL ORDER BY packages.name, link_to_path """, [repodir_id]).fetchall() for file_link in package_files_links: pkg_id = file_link[0] pkg_name = file_link[1] object_id = file_link[3] target_path = os.path.normpath(file_link[5]) target_obj_id = None tofile = dbc.execute(""" SELECT id FROM package_files WHERE path = ? AND package_id = ? """, [target_path, pkg_id]).fetchone() if tofile: target_obj_id = tofile[0] if not target_obj_id: # Just two level of dependency recursion - TODO: Full depth recursion? tofile = dbc.execute(""" SELECT id FROM package_files WHERE path = ? AND package_id IN ( SELECT dep_package_id FROM requires WHERE package_id = ? UNION SELECT dep_package_id FROM requires WHERE package_id IN (SELECT dep_package_id FROM requires WHERE package_id = ?) ) """, [target_path, pkg_id, pkg_id]).fetchone() if tofile: target_obj_id = tofile[0] if target_obj_id: dbc.execute(""" UPDATE package_files SET link_to_file_id = ? WHERE id = ? """, [target_obj_id, object_id]) else: # print 'target %s not found (%d: %s)' % (target_path, pkg_id, pkg_name) pass def process_repodir_so_needed(dbc, repodir_id, repodir_name, repodir_depends): print 'Searching object files resolutions (1)...' dbc.execute(""" INSERT INTO so_needed_res(so_needed_id, dep_obj_file_id, res_type) SELECT so_needed.id, tpf.id, 1 FROM packages CROSS JOIN package_files spf CROSS JOIN so_needed CROSS JOIN requires CROSS JOIN package_files tpf WHERE so_needed.obj_file_id = spf.id AND spf.package_id = packages.id AND packages.repodir_id = ? AND spf.package_id = requires.package_id AND (so_needed.name = requires.name OR so_needed.name || '()(64bit)' = requires.name) AND requires.dep_package_id = tpf.package_id AND so_needed.name = tpf.basename """, [repodir_id]) search_repodirs = [repodir_id] search_repodirs.extend(repodir_depends) in_repodirs = ','.join(str(id) for id in search_repodirs) objects_not_resolved1 = dbc.execute(""" SELECT packages.id AS package_id, packages.nvra, package_files.id AS object_id, package_files.basename AS object_name, so_needed.id AS so_needed_id, so_needed.name AS so_needed_name FROM packages CROSS JOIN package_files CROSS JOIN so_needed WHERE repodir_id = ? AND package_files.package_id = packages.id AND so_needed.obj_file_id = package_files.id AND NOT EXISTS (SELECT 1 FROM so_needed_res WHERE so_needed_res.so_needed_id = so_needed.id) ORDER BY packages.nvra, package_files.basename, so_needed.name """, [repodir_id]).fetchall() print 'Object files not resolved by rpm requires-provides: ', len(objects_not_resolved1) if objects_not_resolved1: print 'Searching object files resolutions (2)...' in_so_needed = ','.join(str(obj_rec[4]) for obj_rec in objects_not_resolved1) dbc.execute(""" INSERT INTO so_needed_res(so_needed_id, dep_obj_file_id, res_type) SELECT so_needed.id, tpf.id, 2 FROM packages, package_files tpf, so_needed WHERE packages.repodir_id IN (%s) AND packages.id = tpf.package_id AND so_needed.id IN (%s) AND tpf.basename = so_needed.name """ % (in_repodirs, in_so_needed)) objects_not_resolved2 = dbc.execute(""" SELECT packages.id AS package_id, packages.nvra, package_files.id AS object_id, package_files.basename AS object_name, so_needed.id AS so_needed_id, so_needed.name AS so_needed_name FROM packages, package_files, so_needed WHERE repodir_id = ? AND package_files.package_id = packages.id AND so_needed.obj_file_id = package_files.id AND NOT EXISTS (SELECT 1 FROM so_needed_res WHERE so_needed_res.so_needed_id = so_needed.id) ORDER BY packages.nvra, package_files.basename, so_needed.name """, [repodir_id]).fetchall() print 'Object files not resolved: ', len(objects_not_resolved2) def process_repodir_obj_symbols(dbc, repodir_id, repodir_name, repodir_depends): print 'Searching symbols resolutions (1)...' # EXPLAIN QUERY PLAN dbc.execute(""" INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type) SELECT sos.id, tos.id, 1 FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols sos CROSS JOIN so_needed CROSS JOIN so_needed_res CROSS JOIN obj_symbols tos WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = sos.obj_file_id AND sos.sym_type = 0 AND sos.obj_file_id = so_needed.obj_file_id AND so_needed.id = so_needed_res.so_needed_id AND so_needed_res.res_type = 1 AND so_needed_res.dep_obj_file_id = tos.obj_file_id AND tos.sym_type = 1 AND tos.name = sos.name """, [repodir_id]) print 'Searching symbols resolutions (2)...' dbc.execute(""" INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type) SELECT sos.id, tos.id, 2 FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols sos CROSS JOIN so_needed CROSS JOIN so_needed_res CROSS JOIN obj_symbols tos WHERE packages.repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = sos.obj_file_id AND sos.sym_type = 0 AND sos.obj_file_id = so_needed.obj_file_id AND so_needed.id = so_needed_res.so_needed_id AND so_needed_res.res_type = 2 AND so_needed_res.dep_obj_file_id = tos.obj_file_id AND tos.sym_type = 1 AND tos.name = sos.name """, [repodir_id]) print 'Searching symbols resolutions (3)...' search_repodirs = [repodir_id] search_repodirs.extend(repodir_depends) in_repodirs = ','.join(str(id) for id in search_repodirs) dbc.execute(""" INSERT INTO obj_symbols_res(obj_sym_id, dep_obj_sym_id, res_type) SELECT sos.id, tos.id, 3 FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols sos CROSS JOIN obj_symbols tos CROSS JOIN package_files tpf WHERE repodir_id = ? AND packages.id = package_files.package_id AND package_files.id = sos.obj_file_id AND sos.sym_type = 0 AND NOT EXISTS (SELECT 1 FROM obj_symbols_res WHERE obj_sym_id = sos.id) AND sos.name = tos.name AND tos.sym_type = 1 AND tos.obj_file_id = tpf.id AND tpf.package_id IN (SELECT id FROM packages WHERE repodir_id IN (%s)) """ % in_repodirs, [repodir_id]) def process_repodir(dbc, repo_id, repo_name, repo_sources, depend_repodir_list, repodirs_processed, dep_arch): all_depends_ready = True repodir_depends = [] in_repodirs = ','.join(str(id) for id in repodirs_processed) for dr_name in depend_repodir_list: repodir_depend_found = dbc.execute(""" SELECT id, name FROM repodirs WHERE id IN (%s) AND name = ? """ % in_repodirs, [dr_name]).fetchall() if len(repodir_depend_found) == 0: all_depends_ready = False break else: for rdf in repodir_depend_found: repodir_depends.append(rdf[0]) if not all_depends_ready: return False print repo_name, ' ', depend_repodir_list, ' ', dep_arch process_repodir_requires(dbc, repo_id, repo_name, repodir_depends, dep_arch) process_repodir_file_links(dbc, repo_id, repo_name, repodir_depends) process_repodir_so_needed(dbc, repo_id, repo_name, repodir_depends) process_repodir_obj_symbols(dbc, repo_id, repo_name, repodir_depends) if repo_sources: print 'Searching source rpms...' dbc.execute(""" UPDATE packages SET sourcerpm_package = NULL WHERE repodir_id = ?""", [repo_id]) dbc.execute(""" UPDATE packages SET sourcerpm_package = (SELECT id FROM packages ps WHERE repodir_id IN (SELECT id FROM repodirs WHERE name = ?) AND ps.nvra = substr(packages.sourcerpm, 1, length(packages.sourcerpm)-4) ) WHERE repodir_id = ? AND sourcerpm LIKE '%.rpm' """, [repo_sources, repo_id]) return True def main(args): conn = sqlite3.connect(DB) dbc = conn.cursor() global n n = 0 dbc.execute(""" PRAGMA cache_size = -1048576 """) dbc.execute(""" DELETE FROM so_needed_res""") dbc.execute(""" DELETE FROM obj_symbols_res""") dbc.execute(""" ANALYZE""") repodirs_processed = [] #Process binary rpms repodirs_processed_cnt = -1 while repodirs_processed_cnt < len(repodirs_processed): in_repodirs = ','.join(str(id) for id in repodirs_processed) repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs WHERE sources <> '.' AND id NOT IN (%s) """ % in_repodirs).fetchall() for repodir in repodirs: (repo_id, repo_name, repo_sources) = (repodir[0], repodir[1], repodir[2]) depend_repodir_names = dbc.execute( """ SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ? """, [repo_id]).fetchall() depend_repodir_list = [drn[0] for drn in depend_repodir_names] if process_repodir(dbc, repo_id, repo_name, repo_sources, depend_repodir_list, repodirs_processed, None): repodirs_processed.append(repo_id) repodirs_processed_cnt = len(repodirs_processed) #Process SRPMS repodirs_processed_cnt = -1 while repodirs_processed_cnt < len(repodirs_processed): repodirs = dbc.execute(""" SELECT id, name, sources, path FROM repodirs WHERE sources = '.' """).fetchall() for repodir in repodirs: (repo_id, repo_name, repo_sources) = (repodir[0], repodir[1], repodir[2]) src_build_archs = [] depend_repodir_names = dbc.execute( """ SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ? """, [repo_id]).fetchall() for drn in depend_repodir_names: dr_name = drn[0] if '$arch' in dr_name: depend_repodir_found = dbc.execute( """ SELECT id, name FROM repodirs WHERE name LIKE ? """, [dr_name.replace('$arch', '%')]).fetchall() if len(depend_repodir_found) == 0: raise Exception('Dependancy repositories not found!') for drf in depend_repodir_found: arch = extract_arch(dr_name, drf[1]) if arch: if arch == 'SRPMS': continue src_build_archs.append(arch) else: raise Exception('Source repository should depend on */$arch/* repo.') for arch in src_build_archs: depend_repodir_list = [drn[0].replace('$arch', arch) for drn in depend_repodir_names] if not process_repodir(dbc, repo_id, repo_name, None, depend_repodir_list, repodirs_processed, arch): raise Exception('Couldn\'t process SRPMS repository!') repodirs_processed.append(repo_id) repodirs_processed_cnt = len(repodirs_processed) in_repodirs = ','.join(str(id) for id in repodirs_processed) repodirs_not_processed = dbc.execute(""" SELECT id, name, sources, path FROM repodirs rd WHERE id NOT IN (%s) """ % in_repodirs).fetchall() if len(repodirs_not_processed) > 0: print 'Repodirs not processed due to dependencies:' for rdna in repodirs_not_processed: print rdna[1] dbc.execute(""" ANALYZE""") conn.commit() if __name__ == "__main__": main(sys.argv)