mirror of
https://abf.rosa.ru/djam/repo-analyzer.git
synced 2025-02-23 10:02:54 +00:00
Allow for partial rpm's analysis
This commit is contained in:
parent
52059b73d8
commit
3712dda39c
3 changed files with 49 additions and 22 deletions
3
README
3
README
|
@ -3,7 +3,8 @@
|
||||||
1. Настроить структуру репозиториев и пути в repo-analyze-config.xml
|
1. Настроить структуру репозиториев и пути в repo-analyze-config.xml
|
||||||
2. Заполнить базу данных информацией из репозиториев:
|
2. Заполнить базу данных информацией из репозиториев:
|
||||||
fill-repodb.py repo-analyze-config.xml
|
fill-repodb.py repo-analyze-config.xml
|
||||||
* Скрипт создаёт в текущем каталоге базу данных repo.db размером около 1 Гб
|
* Для ускорения можно отключить обаботку .so и их символов ключами -O -S соответственно.
|
||||||
|
** Скрипт создаёт в текущем каталоге базу данных repo.db размером около 2 Гб (при полной обработке).
|
||||||
3. Подготовить базу данных к анализу:
|
3. Подготовить базу данных к анализу:
|
||||||
prepare-repodb.py
|
prepare-repodb.py
|
||||||
4. Выполнить анализ/проверки:
|
4. Выполнить анализ/проверки:
|
||||||
|
|
|
@ -232,8 +232,8 @@ SELECT packages.id, packages.name, nvra
|
||||||
print 'Total: %d' % len(singles)
|
print 'Total: %d' % len(singles)
|
||||||
|
|
||||||
def detect_lost_object_files(dbc):
|
def detect_lost_object_files(dbc):
|
||||||
print '==='
|
header = '===\n' \
|
||||||
print 'Lost object (executable) files (provided but not found):'
|
'Lost object (executable) files (provided but not found):'
|
||||||
repodirs = dbc.execute("""
|
repodirs = dbc.execute("""
|
||||||
SELECT id, name, sources, path FROM repodirs ORDER BY id
|
SELECT id, name, sources, path FROM repodirs ORDER BY id
|
||||||
""").fetchall()
|
""").fetchall()
|
||||||
|
@ -246,14 +246,17 @@ SELECT nvra, package_files.path, mark
|
||||||
ORDER BY packages.name, package_files.path
|
ORDER BY packages.name, package_files.path
|
||||||
""", [rd_id]).fetchall()
|
""", [rd_id]).fetchall()
|
||||||
if lost_object_files:
|
if lost_object_files:
|
||||||
|
if header:
|
||||||
|
print header
|
||||||
|
header = None
|
||||||
print '%d) %s' % (rd_id, rd_name)
|
print '%d) %s' % (rd_id, rd_name)
|
||||||
for lof in lost_object_files:
|
for lof in lost_object_files:
|
||||||
print '\t%s: %s' % (lof[0], lof[1])
|
print '\t%s: %s' % (lof[0], lof[1])
|
||||||
print 'Total: %d' % len(lost_object_files)
|
print 'Total: %d' % len(lost_object_files)
|
||||||
|
|
||||||
def detect_broken_object_links(dbc):
|
def detect_broken_object_links(dbc):
|
||||||
print '==='
|
header = '===\n' \
|
||||||
print 'Invalid object (executable) file links:'
|
'Invalid object (executable) file links:'
|
||||||
repodirs = dbc.execute("""
|
repodirs = dbc.execute("""
|
||||||
SELECT id, name, sources, path FROM repodirs ORDER BY id
|
SELECT id, name, sources, path FROM repodirs ORDER BY id
|
||||||
""").fetchall()
|
""").fetchall()
|
||||||
|
@ -267,6 +270,9 @@ SELECT nvra, package_files.path, link_to_path, mark
|
||||||
ORDER BY packages.name, package_files.path
|
ORDER BY packages.name, package_files.path
|
||||||
""", [rd_id]).fetchall()
|
""", [rd_id]).fetchall()
|
||||||
if broken_object_links:
|
if broken_object_links:
|
||||||
|
if header:
|
||||||
|
print header
|
||||||
|
header = None
|
||||||
print '%d) %s' % (rd_id, rd_name)
|
print '%d) %s' % (rd_id, rd_name)
|
||||||
for bol in broken_object_links:
|
for bol in broken_object_links:
|
||||||
print '\t%s: %s -/-> %s' % \
|
print '\t%s: %s -/-> %s' % \
|
||||||
|
@ -279,7 +285,7 @@ SELECT depend_repodir_name FROM repodir_depends WHERE repodir_id = ?
|
||||||
""", [repodir_id]).fetchall()
|
""", [repodir_id]).fetchall()
|
||||||
return ', '.join([dep_repo[0] for dep_repo in dep_repos])
|
return ', '.join([dep_repo[0] for dep_repo in dep_repos])
|
||||||
|
|
||||||
def detect_so_needed_not_found(dbc):
|
def detect_so_needed_not_resolved(dbc):
|
||||||
repodirs = dbc.execute("""
|
repodirs = dbc.execute("""
|
||||||
SELECT id, name, sources, path FROM repodirs ORDER BY id
|
SELECT id, name, sources, path FROM repodirs ORDER BY id
|
||||||
""").fetchall()
|
""").fetchall()
|
||||||
|
@ -305,8 +311,8 @@ SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN so_needed CROS
|
||||||
""", [rd_id]).fetchone()
|
""", [rd_id]).fetchone()
|
||||||
print '%d) %s: %d' % (rd_id, rd_name, objects_needed_resolved2[0])
|
print '%d) %s: %d' % (rd_id, rd_name, objects_needed_resolved2[0])
|
||||||
|
|
||||||
print '==='
|
header = '===' \
|
||||||
print 'Objects needed but not resolved:'
|
'Objects needed but not resolved:'
|
||||||
for repodir in repodirs:
|
for repodir in repodirs:
|
||||||
(rd_id, rd_name) = (repodir[0], repodir[1])
|
(rd_id, rd_name) = (repodir[0], repodir[1])
|
||||||
objects_needed_not_resolved = dbc.execute("""
|
objects_needed_not_resolved = dbc.execute("""
|
||||||
|
@ -318,6 +324,9 @@ SELECT packages.nvra, package_files.path, so_needed.name
|
||||||
""", [rd_id]).fetchall()
|
""", [rd_id]).fetchall()
|
||||||
if objects_needed_not_resolved:
|
if objects_needed_not_resolved:
|
||||||
repodir_depends = get_repodir_depends(dbc, rd_id)
|
repodir_depends = get_repodir_depends(dbc, rd_id)
|
||||||
|
if header:
|
||||||
|
print header
|
||||||
|
header = None
|
||||||
print ('%d) %s' % (rd_id, rd_name)) + \
|
print ('%d) %s' % (rd_id, rd_name)) + \
|
||||||
('' if repodir_depends == '' else
|
('' if repodir_depends == '' else
|
||||||
(' (depends on: %s)' % repodir_depends))
|
(' (depends on: %s)' % repodir_depends))
|
||||||
|
@ -325,7 +334,7 @@ SELECT packages.nvra, package_files.path, so_needed.name
|
||||||
print '\t%s: %s -?-> %s' % (obj_nr[0], obj_nr[1], obj_nr[2])
|
print '\t%s: %s -?-> %s' % (obj_nr[0], obj_nr[1], obj_nr[2])
|
||||||
print 'Total: %d' % len(objects_needed_not_resolved)
|
print 'Total: %d' % len(objects_needed_not_resolved)
|
||||||
|
|
||||||
def detect_symbols_not_found(dbc):
|
def detect_symbols_not_resolved(dbc):
|
||||||
repodirs = dbc.execute("""
|
repodirs = dbc.execute("""
|
||||||
SELECT id, name, sources, path FROM repodirs ORDER BY id
|
SELECT id, name, sources, path FROM repodirs ORDER BY id
|
||||||
""").fetchall()
|
""").fetchall()
|
||||||
|
@ -355,8 +364,8 @@ SELECT COUNT(1) FROM packages CROSS JOIN package_files CROSS JOIN obj_symbols CR
|
||||||
""", [rd_id]).fetchone()
|
""", [rd_id]).fetchone()
|
||||||
print '%d) %s: %d' % (rd_id, rd_name, symbols_resolved3[0])
|
print '%d) %s: %d' % (rd_id, rd_name, symbols_resolved3[0])
|
||||||
|
|
||||||
print '==='
|
header = '===' \
|
||||||
print 'Symbols not resolved:'
|
'Symbols not resolved:'
|
||||||
for repodir in repodirs:
|
for repodir in repodirs:
|
||||||
(rd_id, rd_name) = (repodir[0], repodir[1])
|
(rd_id, rd_name) = (repodir[0], repodir[1])
|
||||||
symbols_not_resolved = dbc.execute("""
|
symbols_not_resolved = dbc.execute("""
|
||||||
|
@ -368,6 +377,9 @@ SELECT packages.nvra, package_files.path, obj_symbols.name
|
||||||
""", [rd_id]).fetchall()
|
""", [rd_id]).fetchall()
|
||||||
if symbols_not_resolved:
|
if symbols_not_resolved:
|
||||||
repodir_depends = get_repodir_depends(dbc, rd_id)
|
repodir_depends = get_repodir_depends(dbc, rd_id)
|
||||||
|
if header:
|
||||||
|
print header
|
||||||
|
header = None
|
||||||
print ('%d) %s' % (rd_id, rd_name)) + \
|
print ('%d) %s' % (rd_id, rd_name)) + \
|
||||||
('' if repodir_depends == '' else
|
('' if repodir_depends == '' else
|
||||||
(' (depends on: %s)' % repodir_depends))
|
(' (depends on: %s)' % repodir_depends))
|
||||||
|
@ -384,8 +396,8 @@ def main(args):
|
||||||
analyze_partitioning(dbc)
|
analyze_partitioning(dbc)
|
||||||
detect_lost_object_files(dbc)
|
detect_lost_object_files(dbc)
|
||||||
detect_broken_object_links(dbc)
|
detect_broken_object_links(dbc)
|
||||||
detect_so_needed_not_found(dbc)
|
detect_so_needed_not_resolved(dbc)
|
||||||
detect_symbols_not_found(dbc)
|
detect_symbols_not_resolved(dbc)
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -40,9 +40,14 @@ def getFileList(path, ext, filelist):
|
||||||
return filelist
|
return filelist
|
||||||
|
|
||||||
def parseargs(args):
|
def parseargs(args):
|
||||||
parser = argparse.ArgumentParser(description=_('extract packages metadata from RPM repositories'))
|
parser = argparse.ArgumentParser(description=_('extract packages metadata'
|
||||||
parser.add_argument("config", metavar="config",
|
' from RPM repositories'))
|
||||||
|
parser.add_argument('config', metavar='config',
|
||||||
help=_('path to repo-analyze-config.xml'))
|
help=_('path to repo-analyze-config.xml'))
|
||||||
|
parser.add_argument('-O', '--no-shared-objects', action='store_true',
|
||||||
|
help=_('don\'t process shared objects'))
|
||||||
|
parser.add_argument('-S', '--no-so-symbols', action='store_true',
|
||||||
|
help=_('don\'t process shared object symbols'))
|
||||||
opts = parser.parse_args()
|
opts = parser.parse_args()
|
||||||
return opts
|
return opts
|
||||||
|
|
||||||
|
@ -169,7 +174,7 @@ FILE_REC_PATH_IDX = 3
|
||||||
FILE_REC_LINK_IDX = 6
|
FILE_REC_LINK_IDX = 6
|
||||||
FILE_REC_MARK_IDX = 7
|
FILE_REC_MARK_IDX = 7
|
||||||
|
|
||||||
def register_object(data, pkg_id, pkg, object_file_record, temp_dir):
|
def register_object(data, pkg_id, pkg, object_file_record, temp_dir, no_so_symbols):
|
||||||
so_needed = data['so_needed']
|
so_needed = data['so_needed']
|
||||||
obj_symbols = data['obj_symbols']
|
obj_symbols = data['obj_symbols']
|
||||||
obj_id = object_file_record[0]
|
obj_id = object_file_record[0]
|
||||||
|
@ -195,7 +200,7 @@ def register_object(data, pkg_id, pkg, object_file_record, temp_dir):
|
||||||
od_out = p.communicate()[0]
|
od_out = p.communicate()[0]
|
||||||
if p.returncode != 0:
|
if p.returncode != 0:
|
||||||
file_mark = 'invalid-format'
|
file_mark = 'invalid-format'
|
||||||
else:
|
elif not(no_so_symbols):
|
||||||
p = subprocess.Popen(['nm', '-p', '-D', '--undefined-only',
|
p = subprocess.Popen(['nm', '-p', '-D', '--undefined-only',
|
||||||
temp_obj_file],
|
temp_obj_file],
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
|
@ -268,7 +273,9 @@ def extract_files(pkg, files_list, obj_so_files_idx, temp_dir):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def process_package_worker(num, queue_in, generator, gen_lock, db_struct, repodir_id, build_archs, temp_dir):
|
def process_package_worker(num, queue_in, generator, gen_lock, db_struct,
|
||||||
|
repodir_id, build_archs, temp_dir,
|
||||||
|
no_shared_objects, no_so_symbols):
|
||||||
|
|
||||||
rpm_ts = rpm.TransactionSet()
|
rpm_ts = rpm.TransactionSet()
|
||||||
rpm_ts.setVSFlags(~(rpm.RPMVSF_NEEDPAYLOAD))
|
rpm_ts.setVSFlags(~(rpm.RPMVSF_NEEDPAYLOAD))
|
||||||
|
@ -340,6 +347,8 @@ def process_package_worker(num, queue_in, generator, gen_lock, db_struct, repodi
|
||||||
dir_name = os.path.dirname(file_path)
|
dir_name = os.path.dirname(file_path)
|
||||||
if dir_name not in files_dirs:
|
if dir_name not in files_dirs:
|
||||||
files_dirs[dir_name] = True
|
files_dirs[dir_name] = True
|
||||||
|
if no_shared_objects:
|
||||||
|
continue
|
||||||
if os.path.splitext(file_name)[1] in \
|
if os.path.splitext(file_name)[1] in \
|
||||||
['.debug', '.xz', '.conf', '.py', '.c', '.h', '.hpp', '.png',
|
['.debug', '.xz', '.conf', '.py', '.c', '.h', '.hpp', '.png',
|
||||||
'.cc', '.cpp', '.sh', '.java', '.pl', '.patch', '.desktop']:
|
'.cc', '.cpp', '.sh', '.java', '.pl', '.patch', '.desktop']:
|
||||||
|
@ -370,7 +379,8 @@ def process_package_worker(num, queue_in, generator, gen_lock, db_struct, repodi
|
||||||
os.makedirs(pkg_temp_dir)
|
os.makedirs(pkg_temp_dir)
|
||||||
if extract_files(pkg, files_list, obj_so_files_idx, pkg_temp_dir):
|
if extract_files(pkg, files_list, obj_so_files_idx, pkg_temp_dir):
|
||||||
for i in obj_so_files_idx:
|
for i in obj_so_files_idx:
|
||||||
register_object(data, pkg_id, pkg, files_list[i], pkg_temp_dir)
|
register_object(data, pkg_id, pkg, files_list[i],
|
||||||
|
pkg_temp_dir, no_so_symbols)
|
||||||
|
|
||||||
shutil.rmtree(pkg_temp_dir, True)
|
shutil.rmtree(pkg_temp_dir, True)
|
||||||
|
|
||||||
|
@ -405,7 +415,8 @@ INSERT INTO obj_symbols(obj_file_id, name, sym_type) VALUES(?, ?, ?)
|
||||||
|
|
||||||
generator_value = 0
|
generator_value = 0
|
||||||
|
|
||||||
def process_repodir(repodir_path, repodir_id, build_archs, conn, db_struct, tempdir):
|
def process_repodir(repodir_path, repodir_id, build_archs, conn, db_struct,
|
||||||
|
tempdir, no_shared_objects, no_so_symbols):
|
||||||
|
|
||||||
rpm_list = []
|
rpm_list = []
|
||||||
rpm_list = getFileList(repodir_path, '.rpm', rpm_list)
|
rpm_list = getFileList(repodir_path, '.rpm', rpm_list)
|
||||||
|
@ -506,7 +517,8 @@ CREATE TABLE IF NOT EXISTS %s (id INTEGER PRIMARY KEY NOT NULL,
|
||||||
for i in xrange(NUM_PROCESSES):
|
for i in xrange(NUM_PROCESSES):
|
||||||
worker = mp.Process(target = process_package_worker,
|
worker = mp.Process(target = process_package_worker,
|
||||||
args = (i, queue_in, id_generator, generator_lock, db_struct,
|
args = (i, queue_in, id_generator, generator_lock, db_struct,
|
||||||
repodir_id, build_archs, tempdir))
|
repodir_id, build_archs, tempdir,
|
||||||
|
no_shared_objects, no_so_symbols))
|
||||||
workers.append(worker)
|
workers.append(worker)
|
||||||
worker.start()
|
worker.start()
|
||||||
queue_in.join()
|
queue_in.join()
|
||||||
|
@ -543,7 +555,9 @@ def main(args):
|
||||||
build_archs = [None] if xrepodir.get('sources') != '.' else \
|
build_archs = [None] if xrepodir.get('sources') != '.' else \
|
||||||
get_build_archs(xrepodir,
|
get_build_archs(xrepodir,
|
||||||
config_root.find('repositories'))
|
config_root.find('repositories'))
|
||||||
process_repodir(xrepodir.get('path'), repodir_id, build_archs, conn, rpm_db_struct, tempdir)
|
process_repodir(xrepodir.get('path'), repodir_id, build_archs, conn,
|
||||||
|
rpm_db_struct, tempdir, options.no_shared_objects,
|
||||||
|
options.no_so_symbols)
|
||||||
shutil.rmtree(tempdir, True)
|
shutil.rmtree(tempdir, True)
|
||||||
index_database(conn)
|
index_database(conn)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue