mirror of
https://github.com/release-engineering/repo-autoindex.git
synced 2025-02-23 21:52:52 +00:00

The Fetcher type was designed to return a 'str'. That wasn't a good idea because it implies that every fetched file must be loaded into memory completely. On certain large yum repos, decompressed primary XML can be hundreds of MB, and it's not appropriate to require loading that all into memory at once. Make it support a file-like object (stream of bytes). Since the SAX XML parser supports reading from a stream, this makes it possible to avoid loading everything into memory at once. A test of repo-autoindex CLI against /content/dist/rhel/server/7/7Server/x86_64/os showed major improvement: - before: ~1200MiB - after: ~80MiB Note that achieving the full improvement requires any downstream users of the library (e.g. exodus-gw) to update their Fetcher implementation as well, to stop returning a 'str'.
71 lines
2.5 KiB
Python
71 lines
2.5 KiB
Python
from typing import Optional, Type
|
|
from collections.abc import AsyncGenerator
|
|
import logging
|
|
|
|
from .base import (
|
|
IOFetcher,
|
|
Repo,
|
|
GeneratedIndex,
|
|
IndexEntry,
|
|
ICON_OPTICAL,
|
|
ICON_QCOW,
|
|
)
|
|
from .template import TemplateContext
|
|
from .tree import treeify
|
|
|
|
LOG = logging.getLogger("repo-autoindex")
|
|
|
|
|
|
class PulpFileRepo(Repo):
|
|
async def render_index(
|
|
self, index_href_suffix: str
|
|
) -> AsyncGenerator[GeneratedIndex, None]:
|
|
all_entries: list[IndexEntry] = [
|
|
IndexEntry(
|
|
href="PULP_MANIFEST",
|
|
text="PULP_MANIFEST",
|
|
size=str(len(self.entry_point_content)),
|
|
)
|
|
]
|
|
|
|
# PULP_MANIFEST is a series of lines like this:
|
|
# rhel-workstation-7.2-snapshot-2-x86_64-boot.iso,fa687b8f847b5301b6da817fdbe612558aa69c65584ec5781f3feb0c19ff8f24,379584512
|
|
# rhel-workstation-7.3-rc-2-x86_64-dvd.iso,eab749310c95b4751ef9df7d7906ae0b8021c8e0dbc280c3efc8e967d5e60e71,4324327424
|
|
# rhel-workstation-7.3-rc-1-x86_64-dvd.iso,e165919d6977e02e493605dda6a30d2d80c3f16ee3f4c3ab946d256b815dd5db,4323278848
|
|
# rhel-server-7.3-rc-1-x86_64-boot.iso,f760611401fd928c2840eba85a7a80653fe2dc9dc94f3cef8ec1f3e7880d4102,427819008
|
|
|
|
for line in sorted(self.entry_point_content.splitlines()):
|
|
components = line.split(",")
|
|
if len(components) != 3:
|
|
LOG.warning("Ignoring bad line in PULP_MANIFEST: %s", line)
|
|
continue
|
|
entry = IndexEntry(
|
|
href=components[0], text=components[0], size=components[2]
|
|
)
|
|
if entry.href.endswith(".iso"):
|
|
entry.icon = ICON_OPTICAL
|
|
elif entry.href.endswith(".qcow2"):
|
|
entry.icon = ICON_QCOW
|
|
all_entries.append(entry)
|
|
|
|
ctx = TemplateContext()
|
|
nodes = [treeify(all_entries, index_href_suffix=index_href_suffix)]
|
|
while nodes:
|
|
node = nodes.pop()
|
|
yield GeneratedIndex(
|
|
content=ctx.render_index(index_entries=node.entries),
|
|
relative_dir=node.relative_dir,
|
|
)
|
|
nodes.extend(node.children)
|
|
|
|
@classmethod
|
|
async def probe(
|
|
cls: Type["PulpFileRepo"], fetcher: IOFetcher, url: str
|
|
) -> Optional["PulpFileRepo"]:
|
|
manifest_url = f"{url}/PULP_MANIFEST"
|
|
manifest_content = await fetcher(manifest_url)
|
|
|
|
if manifest_content is None:
|
|
return None
|
|
|
|
return cls(url, manifest_content.read().decode(), fetcher)
|