Implement support for remote repositories

This commit is contained in:
Alexander Lakhin 2014-02-12 16:08:24 +04:00
parent 4e4efb831d
commit 865dff2a86
3 changed files with 217 additions and 60 deletions

3
README
View file

@ -4,7 +4,8 @@
2. Заполнить базу данных информацией из репозиториев:
fill-repodb.py repo-analyze-config.xml
* Для ускорения можно отключить обаботку .so и их символов ключами -O -S соответственно.
** Скрипт создаёт в текущем каталоге базу данных repo.db размером около 2 Гб (при полной обработке).
** При использовании удалённых репозиториев рекомендуется применять кеш: -с cache-dir
*** Cкрипт создаёт в текущем каталоге базу данных repo.db размером около 2 Гб (при полной обработке, для репозиториев Chrome).
3. Подготовить базу данных к анализу:
prepare-repodb.py
4. Выполнить анализ/проверки:

View file

@ -14,6 +14,8 @@ import shutil
import time
import multiprocessing as mp
import gc
import urllib
from urlparse import urlparse, urlunparse
gettext.install('urpm-tools')
@ -24,17 +26,16 @@ NUM_PROCESSES = 4 # number of CPU's (evaluated automatically)
RPMFILEMODE_DIRECTORY = 0x4000
RPMFILEMODE_EXECUTE = 0111
def getFileList(path, ext, filelist):
extlen = len(ext)
dir_list = os.listdir(path)
def get_files(url, ext):
filelist = []
urlp = urlparse(url)
if urlp.scheme in ['http', 'https']:
return parse_index_html(wget_url(url, None), url, '.rpm')
dir_list = os.listdir(url)
for d in dir_list:
if os.path.isdir(path + '/' + d):
filelist = getFileList(path + '/' + d, ext, filelist)
else:
if d[-extlen:].lower() == ext:
newpath = os.path.normpath(path + '/' + d)
filelist.append(newpath)
if d.endswith(ext):
filepath = os.path.normpath(os.path.join(url, d))
filelist.append(filepath)
return filelist
def parseargs():
@ -42,6 +43,8 @@ def parseargs():
' from RPM repositories'))
parser.add_argument('config', metavar='config',
help=_('path to repo-analyze-config.xml'))
parser.add_argument('-c', '--cache-dir',
help=_('path to cache directory'))
parser.add_argument('-O', '--no-shared-objects', action='store_true',
help=_('don\'t process shared objects'))
parser.add_argument('-S', '--no-so-symbols', action='store_true',
@ -301,17 +304,19 @@ def process_package_worker(num, queue_in, generator, gen_lock, db_struct,
(pkg, ) = job
pkg_id = generate_new_id(generator, gen_lock)
local_pkg = get_local_file(pkg, temp_dir)
hdr = get_rpm_header(rpm_ts, pkg)
hdr = get_rpm_header(rpm_ts, local_pkg)
package_values = []
package_values.append(pkg_id)
for tag in db_struct['packages_tags']:
hval = hdr[tag]
package_values.append(
sqlite3.Binary(hval) if tag in db_struct['blob_tags'] else \
to_string(pkg, tag, hval) if type(hval) in [type([]), type('')] else \
hval
(sqlite3.Binary(hval) if len(hval)>0 else None)
if tag in db_struct['blob_tags'] else \
to_string(pkg, tag, hval) if \
type(hval) in [type([]), type('')] else hval
)
package_values.append(repodir_id)
package_values.append(pkg)
@ -328,9 +333,9 @@ def process_package_worker(num, queue_in, generator, gen_lock, db_struct,
dep_flags[i],
dep_version[i],
pkg_id, build_arch])
# fonts-ttf-decoratives-1.3-27-rosa.lts2012.0.noarch.rpm provides font(derdämonschriftkegel)
(pkg_file_paths, pkg_file_names, pkg_file_sizes, pkg_file_modes) = \
(hdr['RPMTAG_FILEPATHS'], hdr['RPMTAG_BASENAMES'], hdr['RPMTAG_FILESIZES'], hdr['RPMTAG_FILEMODES'])
(hdr['RPMTAG_FILEPATHS'], hdr['RPMTAG_BASENAMES'],
hdr['RPMTAG_FILESIZES'], hdr['RPMTAG_FILEMODES'])
files_list = data['package_files']
files_dirs = {}
obj_so_files_idx = []
@ -368,7 +373,8 @@ def process_package_worker(num, queue_in, generator, gen_lock, db_struct,
for fdir in sorted(files_dirs.keys()):
if files_dirs[fdir]:
# Add parent directories as implicit files # TODO: recursive processing?
# Add parent directories as implicit files
# TODO: recursive processing?
pkg_file_id = generate_new_id(generator, gen_lock)
files_list.append([pkg_file_id, #FILE_REC_ID_IDX = 0
pkg_id,
@ -381,15 +387,17 @@ def process_package_worker(num, queue_in, generator, gen_lock, db_struct,
])
if obj_so_files_idx:
pkg_temp_dir = os.path.join(temp_dir, os.path.basename(pkg))
pkg_temp_dir = os.path.join(temp_dir, os.path.basename(local_pkg))
os.makedirs(pkg_temp_dir)
if extract_files(pkg, files_list, obj_so_files_idx, pkg_temp_dir):
if extract_files(local_pkg, files_list,
obj_so_files_idx, pkg_temp_dir):
for i in obj_so_files_idx:
register_object(data, files_list[i], pkg_temp_dir,
no_so_symbols)
shutil.rmtree(pkg_temp_dir, True)
remove_cached_file(pkg)
queue_in.task_done()
conn = sqlite3.connect(DB, timeout=30)
@ -419,25 +427,164 @@ INSERT INTO obj_symbols(obj_file_id, name, sym_type) VALUES(?, ?, ?)
conn.commit()
queue_in.task_done()
local_cache = {}
def get_local_file(url, temp_dir):
urlp = urlparse(url)
if urlp.scheme in ['http', 'https']:
cached_file_name = local_cache.get(url)
if cached_file_name and os.path.isfile(cached_file_name):
return cached_file_name
cache_dir = os.path.join(temp_dir, 'cache')
if not os.path.isdir(cache_dir):
os.makedirs(cache_dir)
temp_file = os.path.join(cache_dir, os.path.basename(url))
wget_url(url, temp_file)
local_cache[url] = temp_file
return temp_file
return url
def remove_cached_file(url):
cached_file_name = local_cache.get(url)
if cached_file_name:
os.unlink(cached_file_name)
del local_cache[url]
def wget_url(url, target_file):
urlp = urlparse(url)
wget_params = []
site = urlp.netloc
if urlp.username:
wget_params = wget_params + ['--auth-no-challenge',
'--http-user=%s' % urlp.username,
'--http-password=%s' %
('""' if not urlp.password else urlp.password)]
site = site[site.find('@') + 1:]
url = urlunparse((urlp.scheme, site, urlp.path, urlp.params,
urlp.query, urlp.fragment))
print 'Downloading %s...' % url
if target_file is None:
wget_params += ['-nv', '-O-', url]
else:
wget_params += ['-nv', '-O', target_file, url]
p = subprocess.Popen(['wget'] + wget_params,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
wget_out, wget_err = p.communicate()
if p.returncode != 0:
print >> sys.stderr, ('Unable to get data from the url: %s '
'(error: %d).\n%s\n%s') % \
(url, p.returncode, wget_out, wget_err)
raise Exception('Unable to download data (%d).' % p.returncode)
if target_file is None:
return wget_out
def parse_index_html(index_html, base_url, filter_ext):
file_list = []
for match in re.finditer(r'href="([^"]+)"', index_html, re.M):
filename = match.group(1)
if filename.endswith(filter_ext):
filepath = os.path.join(base_url, filename)
if os.path.dirname(filepath) == base_url.rstrip('/') and \
os.path.basename(filepath) == filename:
file_list.append(filepath)
return file_list
def download_repodir(source_urlp, cache_dir):
site = source_urlp.netloc
site = site[site.find('@') + 1:]
target_dir = os.path.join(cache_dir,
site,
source_urlp.path.lstrip('/'))
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
remote_files = {}
if source_urlp.scheme in ['http', 'https']:
source_url = source_urlp.geturl()
remote_dir_contents = parse_index_html(wget_url(source_url, None),
source_url, '.rpm')
for remote_file in remote_dir_contents:
remote_filename = urllib.unquote(os.path.basename(remote_file))
remote_files[remote_filename] = True
target_file = os.path.join(target_dir, remote_filename)
if os.path.isfile(target_file):
continue
wget_url(remote_file, target_file)
for local_filename in os.listdir(target_dir):
if local_filename not in remote_files and \
local_filename.endswith('.rpm'):
print 'Removing local file: %s.' % local_filename
os.unlink(os.path.join(target_dir, local_filename))
return target_dir
def urpm_get_packages(media):
p = subprocess.Popen(['urpmq', '-r', '--ignorearch',
'--list', '--media', media],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
urpmq_out, urpmq_err = p.communicate()
if p.returncode != 0 or len(urpmq_err) > 0:
print >> sys.stderr, ('Unable to get a list of packages '
'from the media: %s.\n'
'%s\n%s') % (media, urpmq_out, urpmq_err)
raise Exception('Unable to get a list of packages (%d).' % p.returncode)
# urpmi --no-install --allow-nodeps --force
# --download-all=/tmp/ xine-wavpack-1.2.4-1plf --media Desktop2012.1-8
p = subprocess.Popen(['urpmq', '-f', '--ignorearch',
'--list', '--media', media],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
urpmq_out, urpmq_err = p.communicate()
if p.returncode != 0 or len(urpmq_err) > 0:
print >> sys.stderr, ('Unable to get a list of packages '
'from the media: %s.\n'
'%s\n%s') % (media, urpmq_out, urpmq_err)
raise Exception('Unable to get a list of packages (%d).' % p.returncode)
#qr_lines = urpmq_out.split('\n')
raise Exception('Not implemented.')
def urpm_get_repodir(repodir_name, cache_dir):
target_dir = os.path.join(cache_dir,
repodir_name,
'rpms')
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
urpm_files = {}
urpm_media_contents = urpm_get_packages(repodir_name)
raise Exception('Not implemented.')
generator_value = 0
def process_repodir(repodir_path, repodir_id, build_archs, conn, db_struct,
tempdir, no_shared_objects, no_so_symbols):
def process_repodir(xrepodir, repodir_id, cache_dir, build_archs, conn,
db_struct, temp_dir, no_shared_objects, no_so_symbols):
repodir_url = xrepodir.get('url')
urlp = urlparse(repodir_url)
working_url = repodir_url
if cache_dir is not None:
if urlp.scheme in ['http', 'https']:
working_url = download_repodir(urlp, cache_dir)
elif urlp.scheme == 'urpm':
working_url = urpm_get_repodir(xrepodir.get('name'), cache_dir)
elif urlp.scheme not in ['', 'file']:
raise Exception('Invalid scheme in the repository url: %s' %
repodir_url)
rpm_list = []
rpm_list = getFileList(repodir_path, '.rpm', rpm_list)
rpm_list = get_files(working_url, '.rpm')
if not rpm_list:
return
print repodir_path, ': ', len(rpm_list)
print urlp.netloc[urlp.netloc.find('@') + 1:] + urlp.path, ': ', \
len(rpm_list)
if not db_struct.get('defined'):
rpm_ts = rpm.TransactionSet()
rpm_ts.setVSFlags(~(rpm.RPMVSF_NEEDPAYLOAD))
# ts.setVSFlags(~(rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD))
hdr = get_rpm_header(rpm_ts, rpm_list[0])
hdr = get_rpm_header(rpm_ts, get_local_file(rpm_list[0], temp_dir))
packages_extra_fields = {'repodir_id': 'INTEGER',
'rpm_filepath': 'TEXT',
'package_url': 'TEXT',
'sourcerpm_package': 'TEXT'}
file_tags_re = r'^RPMTAG_(BASENAMES|FILE[\w\d]+)'
@ -446,8 +593,9 @@ def process_repodir(repodir_path, repodir_id, build_archs, conn, db_struct,
trigger_tags_re = r'^RPMTAG_TRIGGER\w+'
datetime_tags = ['RPMTAG_PACKAGETIME', 'RPMTAG_RPMLIBTIMESTAMP', ]
db_struct['blob_tags'] = ['RPMTAG_DSAHEADER', 'RPMTAG_HEADERIMMUTABLE',
'RPMTAG_PKGID', 'RPMTAG_SIGMD5']
db_struct['blob_tags'] = ['RPMTAG_RSAHEADER', 'RPMTAG_DSAHEADER',
'RPMTAG_HEADERIMMUTABLE', 'RPMTAG_SIGMD5',
'RPMTAG_PKGID', 'RPMTAG_SOURCEPKGID']
reserved_field_names = ['id', 'group']
skip_tags_re = '^RPMTAG_(C|D|E|N|P|R|V|HEADERIMMUTABLE)$'
@ -469,10 +617,11 @@ def process_repodir(repodir_path, repodir_id, build_archs, conn, db_struct,
rpmtags = [str(t) for t in dir(rpm) if t.startswith('RPMTAG_') ]
for tag in rpmtags:
if re.match(file_tags_re, tag) or re.match(dir_tags_re, tag) or \
re.match(changelog_tags_re, tag) or \
re.match(skip_tags_re, tag) or re.match(trigger_tags_re, tag) or \
re.match(dep_tags_re, tag):
if (re.match(file_tags_re, tag) or re.match(dir_tags_re, tag) or
re.match(changelog_tags_re, tag) or
re.match(skip_tags_re, tag) or
re.match(trigger_tags_re, tag) or
re.match(dep_tags_re, tag)):
continue
sqltype = "TIMESTAMP" if tag in datetime_tags else \
"BLOB" if tag in db_struct['blob_tags'] else \
@ -513,19 +662,21 @@ CREATE TABLE IF NOT EXISTS %s (id INTEGER PRIMARY KEY NOT NULL,
for i in xrange(NUM_PROCESSES):
queue_in.put(None)
# run workers
gc.collect() # Trying to prevent Exception AssertionError: AssertionError() in <Finalize object, dead> ignored
# Trying to prevent Exception AssertionError: AssertionError() in
# <Finalize object, dead> ignored
gc.collect()
time.sleep(1)
gc.disable()
global generator_value
id_generator = mp.Value('i', generator_value)
generator_lock = mp.Lock()
# run workers
workers = []
for i in xrange(NUM_PROCESSES):
worker = mp.Process(target = process_package_worker,
args = (i, queue_in, id_generator,
generator_lock, db_struct,
repodir_id, build_archs, tempdir,
repodir_id, build_archs, temp_dir,
no_shared_objects, no_so_symbols))
workers.append(worker)
worker.start()
@ -553,20 +704,25 @@ def main(args):
parser = ET.XMLParser()
tree = ET.parse(options.config, parser=parser)
config_root = tree.getroot()
tempdir = '/dev/shm/rt-tmp/'
shutil.rmtree(tempdir, True)
os.mkdir(tempdir)
temp_dir = '/dev/shm/rt-tmp/'
shutil.rmtree(temp_dir, True)
os.mkdir(temp_dir)
rpm_db_struct = {}
for xrepodir in config_root.find('repositories').findall('dir'):
repodir_id = add_repodir(xrepodir, conn)
build_archs = [None] if xrepodir.get('sources') != '.' else \
get_build_archs(xrepodir,
config_root.find('repositories'))
process_repodir(xrepodir.get('path'), repodir_id, build_archs, conn,
rpm_db_struct, tempdir, options.no_shared_objects,
options.no_so_symbols)
shutil.rmtree(tempdir, True)
index_database(conn)
process_repodir(xrepodir, repodir_id, options.cache_dir,
build_archs, conn, rpm_db_struct, temp_dir,
options.no_shared_objects, options.no_so_symbols)
shutil.rmtree(temp_dir, True)
if rpm_db_struct.get('defined'):
index_database(conn)
else:
print 'Database was not initialized ' \
'(check whether repositories are empty).'
os.unlink(DB)
if __name__ == "__main__":

View file

@ -4,77 +4,77 @@
<!-- <dir name="rosa-dx-chrome-1.0/armv7hl/debug_main/release"
arch="armv7hl"
sources="rosa-dx-chrome-1.0/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/armv7hl/debug_main/release/"></dir>
url="/tmp/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/armv7hl/debug_main/release/"></dir>
<dir name="rosa-dx-chrome-1.0/armv7hl/main/release"
arch="armv7hl"
sources="rosa-dx-chrome-1.0/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/armv7hl/main/release/"></dir>
url="/tmp/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/armv7hl/main/release/"></dir>
<dir name="rosa-dx-chrome-1.0/armv7l/debug_main/release"
arch="armv7l"
sources="rosa-dx-chrome-1.0/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/armv7l/debug_main/release/"></dir>
url="/tmp/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/armv7l/debug_main/release/"></dir>
<dir name="rosa-dx-chrome-1.0/armv7l/main/release"
arch="armv7l"
sources="rosa-dx-chrome-1.0/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/armv7l/main/release/"></dir>
url="/tmp/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/armv7l/main/release/"></dir>
-->
<dir name="rosa-dx-chrome-1.0/i586/debug_main/release"
sources="rosa-dx-chrome-1.0/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/i586/debug_main/release/"></dir>
url="http://{token}@abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/i586/debug_main/release/"></dir>
<dir name="rosa-dx-chrome-1.0/i586/main/release"
sources="rosa-dx-chrome-1.0/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/i586/main/release/"></dir>
url="http://{token}@abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/i586/main/release/"></dir>
<dir name="rosa-dx-chrome-1.0/i586/main/updates"
sources="rosa-dx-chrome-1.0/SRPMS/main/updates"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/i586/main/updates/">
url="http://{token}@abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/i586/main/updates/">
<dependency>rosa-dx-chrome-1.0/i586/main/release</dependency>
</dir>
<dir name="dx_rc_personal/i586/debug_main/release"
sources="dx_rc_personal/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/dx_rc_personal/repository/rosa-dx-chrome-1.0/i586/debug_main/release/">
url="http://{token}@abf-downloads.rosalinux.ru/dx_rc_personal/repository/rosa-dx-chrome-1.0/i586/debug_main/release/">
<dependency>rosa-dx-chrome-1.0/i586/main/release</dependency>
</dir>
<dir name="dx_rc_personal/i586/main/release"
sources="dx_rc_personal/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/dx_rc_personal/repository/rosa-dx-chrome-1.0/i586/main/release/">
url="http://{token}@abf-downloads.rosalinux.ru/dx_rc_personal/repository/rosa-dx-chrome-1.0/i586/main/release/">
<dependency>rosa-dx-chrome-1.0/i586/main/release</dependency>
</dir>
<dir name="rosa-dx-chrome-1.0/x86_64/debug_main/release"
sources="rosa-dx-chrome-1.0/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/x86_64/debug_main/release/"></dir>
url="http://{token}@abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/x86_64/debug_main/release/"></dir>
<dir name="rosa-dx-chrome-1.0/x86_64/main/release"
sources="rosa-dx-chrome-1.0/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/x86_64/main/release/"></dir>
url="http://{token}@abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/x86_64/main/release/"></dir>
<dir name="rosa-dx-chrome-1.0/x86_64/main/updates"
sources="rosa-dx-chrome-1.0/SRPMS/main/updates"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/x86_64/main/updates/">
url="http://{token}@abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/x86_64/main/updates/">
<dependency>rosa-dx-chrome-1.0/x86_64/main/release</dependency>
</dir>
<dir name="dx_rc_personal/x86_64/debug_main/release"
sources="dx_rc_personal/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/dx_rc_personal/repository/rosa-dx-chrome-1.0/x86_64/debug_main/release/">
url="http://{token}@abf-downloads.rosalinux.ru/dx_rc_personal/repository/rosa-dx-chrome-1.0/x86_64/debug_main/release/">
<dependency>rosa-dx-chrome-1.0/x86_64/main/release</dependency>
</dir>
<dir name="dx_rc_personal/x86_64/main/release"
sources="dx_rc_personal/SRPMS/main/release"
path="/mnt/7/repo/abf-downloads.rosalinux.ru/dx_rc_personal/repository/rosa-dx-chrome-1.0/x86_64/main/release/">
url="http://{token}@abf-downloads.rosalinux.ru/dx_rc_personal/repository/rosa-dx-chrome-1.0/x86_64/main/release/">
<dependency>rosa-dx-chrome-1.0/x86_64/main/release</dependency>
</dir>
<dir name="rosa-dx-chrome-1.0/SRPMS/main/release"
sources="."
path="/mnt/7/repo/abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/SRPMS/main/release/">
url="http://{token}@abf-downloads.rosalinux.ru/rosa-dx-chrome-1.0/repository/SRPMS/main/release/">
<dependency>rosa-dx-chrome-1.0/$arch/main/release</dependency>
</dir>
<dir name="dx_rc_personal/SRPMS/main/release"
sources="."
path="/mnt/7/repo/abf-downloads.rosalinux.ru/dx_rc_personal/repository/rosa-dx-chrome-1.0/SRPMS/main/release/">
url="http://{token}@abf-downloads.rosalinux.ru/dx_rc_personal/repository/rosa-dx-chrome-1.0/SRPMS/main/release/">
<dependency>rosa-dx-chrome-1.0/$arch/main/release</dependency>
<dependency>dx_rc_personal/$arch/main/release</dependency>
</dir>