diff --git a/fill-repodb.py b/fill-repodb.py index cfc2bbb..06f8935 100755 --- a/fill-repodb.py +++ b/fill-repodb.py @@ -29,7 +29,7 @@ RPMFILEMODE_EXECUTE = 0111 def get_files(url, ext): filelist = [] urlp = urlparse(url) - if urlp.scheme in ['http', 'https']: + if urlp.scheme in ['ftp', 'http', 'https']: return parse_index_html(wget_url(url, None), url, '.rpm') dir_list = os.listdir(url) for d in dir_list: @@ -303,7 +303,7 @@ def process_package_worker(num, queue_in, generator, gen_lock, db_struct, while True: job = queue_in.get() - if job == None: + if job is None: break (pkg, ) = job @@ -434,7 +434,7 @@ INSERT INTO obj_symbols(obj_file_id, name, sym_type) VALUES(?, ?, ?) local_cache = {} def get_local_file(url, temp_dir): urlp = urlparse(url) - if urlp.scheme in ['http', 'https']: + if urlp.scheme in ['ftp', 'http', 'https']: cached_file_name = local_cache.get(url) if cached_file_name and os.path.isfile(cached_file_name): return cached_file_name @@ -487,6 +487,9 @@ def parse_index_html(index_html, base_url, filter_ext): for match in re.finditer(r'href="([^"]+)"', index_html, re.M): filename = match.group(1) if filename.endswith(filter_ext): + if '://' in filename[:8]: + file_list.append(filename) + continue filepath = os.path.join(base_url, filename) if os.path.dirname(filepath) == base_url.rstrip('/') and \ os.path.basename(filepath) == filename: @@ -502,7 +505,7 @@ def download_repodir(source_urlp, cache_dir): if not os.path.isdir(target_dir): os.makedirs(target_dir) remote_files = {} - if source_urlp.scheme in ['http', 'https']: + if source_urlp.scheme in ['ftp', 'http', 'https']: source_url = source_urlp.geturl() remote_dir_contents = parse_index_html(wget_url(source_url, None), source_url, '.rpm') @@ -523,31 +526,75 @@ def download_repodir(source_urlp, cache_dir): return target_dir def urpm_get_packages(media): + extra_params = [] + if not media.endswith(' update'): + extra_params = ['--exclude-media', media + ' update'] p = subprocess.Popen(['urpmq', '-r', '--ignorearch', - '--list', '--media', media], + '--list', '--media', media] + + extra_params, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - urpmq_out, urpmq_err = p.communicate() - if p.returncode != 0 or len(urpmq_err) > 0: + urpmqr_out, urpmqr_err = p.communicate() + if p.returncode != 0 or len(urpmqr_err) > 0: print >> sys.stderr, ('Unable to get a list of packages ' 'from the media: %s.\n' - '%s\n%s') % (media, urpmq_out, urpmq_err) + '%s\n%s') % (media, urpmqr_out, urpmq_err) raise Exception('Unable to get a list of packages (%d).' % p.returncode) # urpmi --no-install --allow-nodeps --force # --download-all=/tmp/ xine-wavpack-1.2.4-1plf --media Desktop2012.1-8 p = subprocess.Popen(['urpmq', '-f', '--ignorearch', - '--list', '--media', media], + '--list', '--media', media] + + extra_params, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - urpmq_out, urpmq_err = p.communicate() - if p.returncode != 0 or len(urpmq_err) > 0: + urpmqf_out, urpmqf_err = p.communicate() + if p.returncode != 0 or len(urpmqf_err) > 0: print >> sys.stderr, ('Unable to get a list of packages ' 'from the media: %s.\n' - '%s\n%s') % (media, urpmq_out, urpmq_err) + '%s\n%s') % (media, urpmqf_out, urpmqf_err) raise Exception('Unable to get a list of packages (%d).' % p.returncode) - #qr_lines = urpmq_out.split('\n') - raise Exception('Not implemented.') + rpm_list = [] + qr_lines = urpmqr_out.split('\n') + qf_lines = urpmqf_out.split('\n') + if len(qr_lines) != len(qf_lines): + raise Exception('Not consistent urpmq -r and urpmq -f outputs ' + '(%d and %d lines).' % (len(qr_lines), len(qf_lines))) + for i in xrange(0, len(qf_lines)): + qf_line = qf_lines[i] + if qf_line.strip() == '': + continue + if not qf_line.startswith(qr_lines[i]): + raise Exception('Not consistent urpmq -r and urpmq -f outputs: ' + '%s and %s' % (qr_lines[i], qf_line)) + rpm_list.append('urpm://%s/%s.rpm#%s' % (urllib.quote(media), + urllib.quote(qf_line), + urllib.quote(qr_lines[i]))) + return rpm_list + + +def get_urpmi(urpm_package, target_dir): + urlp = urlparse(urpm_package) + package_name = urllib.unquote(urlp.fragment) + print package_name + p = subprocess.Popen(['urpmi', '--no-install', + '--force', '--no-suggests', + '--allow-nodeps', + '--no-download-all', + '--media', urlp.netloc, + package_name]) + #stdout=subprocess.PIPE, + #stderr=subprocess.PIPE) + urpmi_out, urpmi_err = p.communicate() + if p.returncode != 0: + print >> sys.stderr, ('Unable to get the package %s ' + 'from the media %s.\n' + '%s\n%s') % ( + package_name, urlp.netloc, + urpmi_out, urpmi_err) + raise Exception('Unable to get the package %s (%d).' % + (package_name, p.returncode)) + def urpm_get_repodir(repodir_name, cache_dir): target_dir = os.path.join(cache_dir, @@ -557,6 +604,12 @@ def urpm_get_repodir(repodir_name, cache_dir): os.makedirs(target_dir) urpm_files = {} urpm_media_contents = urpm_get_packages(repodir_name) + for urpm_package in urpm_media_contents: + remote_filename = urllib.unquote(os.path.basename(urpm_package)) + target_file = os.path.join(target_dir, remote_filename) + get_urpmi(urpm_package, os.path.join(cache_dir, + repodir_name)) + print target_file raise Exception('Not implemented.') generator_value = 0 @@ -568,7 +621,7 @@ def process_repodir(xrepodir, repodir_id, cache_dir, build_archs, conn, urlp = urlparse(repodir_url) working_url = repodir_url if cache_dir is not None: - if urlp.scheme in ['http', 'https']: + if urlp.scheme in ['ftp', 'http', 'https']: working_url = download_repodir(urlp, cache_dir) elif urlp.scheme == 'urpm': working_url = urpm_get_repodir(xrepodir.get('name'), cache_dir)