mirror of
https://github.com/release-engineering/repo-autoindex.git
synced 2025-02-23 13:42:52 +00:00
Implement error handling
Ultimately, all errors are propagated in some way, but it's important to differentiate between "the content was invalid" vs "failed to fetch the content".
This commit is contained in:
parent
74f657c79e
commit
293f5887b7
5 changed files with 209 additions and 21 deletions
|
@ -1,4 +1,7 @@
|
||||||
from ._impl.api import autoindex
|
from ._impl.api import autoindex
|
||||||
from ._impl.base import Fetcher, GeneratedIndex
|
from ._impl.base import Fetcher, GeneratedIndex, ContentError
|
||||||
|
|
||||||
__all__ = ["autoindex", "Fetcher", "GeneratedIndex"]
|
ContentError.__module__ = "repo_autoindex"
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["autoindex", "ContentError", "Fetcher", "GeneratedIndex"]
|
||||||
|
|
|
@ -5,7 +5,7 @@ from typing import Optional, Type
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
from .base import Fetcher, GeneratedIndex, Repo
|
from .base import Fetcher, GeneratedIndex, Repo, ContentError, FetcherError
|
||||||
from .yum import YumRepo
|
from .yum import YumRepo
|
||||||
from .pulp import PulpFileRepo
|
from .pulp import PulpFileRepo
|
||||||
|
|
||||||
|
@ -40,6 +40,17 @@ def http_fetcher(session: aiohttp.ClientSession) -> Fetcher:
|
||||||
return get_content_with_session
|
return get_content_with_session
|
||||||
|
|
||||||
|
|
||||||
|
def with_error_handling(fetcher: Fetcher) -> Fetcher:
|
||||||
|
# wraps a fetcher such that any raised exceptions are wrapped into FetcherError
|
||||||
|
async def new_fetcher(url: str) -> Optional[str]:
|
||||||
|
try:
|
||||||
|
return await fetcher(url)
|
||||||
|
except Exception as exc:
|
||||||
|
raise FetcherError from exc
|
||||||
|
|
||||||
|
return new_fetcher
|
||||||
|
|
||||||
|
|
||||||
async def autoindex(
|
async def autoindex(
|
||||||
url: str,
|
url: str,
|
||||||
*,
|
*,
|
||||||
|
@ -91,6 +102,15 @@ async def autoindex(
|
||||||
|
|
||||||
Zero indexes may be produced if the given URL doesn't represent a repository
|
Zero indexes may be produced if the given URL doesn't represent a repository
|
||||||
of any supported type.
|
of any supported type.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
:class:`ContentError`
|
||||||
|
Raised if indexed content appears to be invalid (for example, a yum repository
|
||||||
|
has invalid repodata).
|
||||||
|
|
||||||
|
:class:`Exception`
|
||||||
|
Any exception raised by ``fetcher`` will propagate (for example, I/O errors or
|
||||||
|
HTTP request failures).
|
||||||
"""
|
"""
|
||||||
if fetcher is None:
|
if fetcher is None:
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
|
@ -103,8 +123,23 @@ async def autoindex(
|
||||||
while url.endswith("/"):
|
while url.endswith("/"):
|
||||||
url = url[:-1]
|
url = url[:-1]
|
||||||
|
|
||||||
for repo_type in REPO_TYPES:
|
fetcher = with_error_handling(fetcher)
|
||||||
repo = await repo_type.probe(fetcher, url)
|
|
||||||
if repo:
|
try:
|
||||||
async for page in repo.render_index(index_href_suffix=index_href_suffix):
|
for repo_type in REPO_TYPES:
|
||||||
yield page
|
repo = await repo_type.probe(fetcher, url)
|
||||||
|
if repo:
|
||||||
|
async for page in repo.render_index(
|
||||||
|
index_href_suffix=index_href_suffix
|
||||||
|
):
|
||||||
|
yield page
|
||||||
|
except FetcherError as exc:
|
||||||
|
# FetcherErrors are unwrapped to propagate whatever was the original error
|
||||||
|
assert exc.__cause__
|
||||||
|
raise exc.__cause__ from None
|
||||||
|
except ContentError:
|
||||||
|
# explicitly raised ContentErrors are allowed to propagate
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
# Any other errors are treated as a ContentError
|
||||||
|
raise ContentError(f"Invalid content found at {url}") from exc
|
||||||
|
|
|
@ -15,6 +15,22 @@ ICON_QCOW = "🐮"
|
||||||
ICON_OTHER = " "
|
ICON_OTHER = " "
|
||||||
|
|
||||||
|
|
||||||
|
class ContentError(Exception):
|
||||||
|
"""An error raised when indexed content appears to be invalid.
|
||||||
|
|
||||||
|
Errors of this type are raised when repo-autoindex is able to successfully
|
||||||
|
retrieve content and determine a repository type but fails to parse
|
||||||
|
repository metadata. For example, a corrupt yum repository may cause this
|
||||||
|
error to be raised.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class FetcherError(Exception):
|
||||||
|
# Internal-only error used to separate exceptions raised by fetchers from
|
||||||
|
# exceptions raised by anything else.
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class GeneratedIndex:
|
class GeneratedIndex:
|
||||||
"""A single HTML index page generated by repo-autoindex."""
|
"""A single HTML index page generated by repo-autoindex."""
|
||||||
|
|
|
@ -3,30 +3,35 @@ import logging
|
||||||
import os
|
import os
|
||||||
from collections.abc import AsyncGenerator, Generator, Iterable
|
from collections.abc import AsyncGenerator, Generator, Iterable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Type
|
from typing import Optional, Type, Any, TypeVar, NoReturn, overload
|
||||||
from xml.dom.minidom import Element
|
from xml.dom.minidom import Element
|
||||||
from xml.dom.pulldom import END_ELEMENT, START_ELEMENT, DOMEventStream
|
from xml.dom.pulldom import END_ELEMENT, START_ELEMENT
|
||||||
|
|
||||||
from defusedxml import pulldom # type: ignore
|
from defusedxml import pulldom # type: ignore
|
||||||
|
|
||||||
from .base import ICON_FOLDER, ICON_PACKAGE, Fetcher, GeneratedIndex, IndexEntry, Repo
|
from .base import ICON_PACKAGE, Fetcher, GeneratedIndex, IndexEntry, Repo, ContentError
|
||||||
from .template import TemplateContext
|
from .template import TemplateContext
|
||||||
from .tree import treeify
|
from .tree import treeify
|
||||||
|
|
||||||
LOG = logging.getLogger("autoindex")
|
LOG = logging.getLogger("autoindex")
|
||||||
|
|
||||||
|
|
||||||
|
def assert_repodata_ok(condition: Any, msg: str):
|
||||||
|
if not condition:
|
||||||
|
raise ContentError(msg)
|
||||||
|
|
||||||
|
|
||||||
def get_tag(elem: Element, name: str) -> Element:
|
def get_tag(elem: Element, name: str) -> Element:
|
||||||
elems: list[Element] = elem.getElementsByTagName(name) # type: ignore
|
elems: list[Element] = elem.getElementsByTagName(name) # type: ignore
|
||||||
|
assert_repodata_ok(len(elems) == 1, f"expected exactly one {name} tag")
|
||||||
return elems[0]
|
return elems[0]
|
||||||
|
|
||||||
|
|
||||||
def get_text_tag(elem: Element, name: str) -> str:
|
def get_text_tag(elem: Element, name: str) -> str:
|
||||||
tagnode = get_tag(elem, name)
|
tagnode = get_tag(elem, name)
|
||||||
child = tagnode.firstChild
|
child = tagnode.firstChild
|
||||||
# TODO: raise proper error if missing
|
assert_repodata_ok(child, f"missing text {name} tag")
|
||||||
assert child
|
return str(child.toxml()) # type: ignore
|
||||||
return str(child.toxml())
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -39,7 +44,6 @@ class Package:
|
||||||
def from_element(cls, elem: Element) -> "Package":
|
def from_element(cls, elem: Element) -> "Package":
|
||||||
return cls(
|
return cls(
|
||||||
href=get_tag(elem, "location").attributes["href"].value,
|
href=get_tag(elem, "location").attributes["href"].value,
|
||||||
# TODO: tolerate some of these being absent or wrong.
|
|
||||||
time=get_tag(elem, "time").attributes["file"].value,
|
time=get_tag(elem, "time").attributes["file"].value,
|
||||||
size=get_tag(elem, "size").attributes["package"].value,
|
size=get_tag(elem, "size").attributes["package"].value,
|
||||||
)
|
)
|
||||||
|
@ -104,10 +108,9 @@ class YumRepo(Repo):
|
||||||
)
|
)
|
||||||
if len(revision_nodes) == 1:
|
if len(revision_nodes) == 1:
|
||||||
timestamp_node = revision_nodes[0].firstChild
|
timestamp_node = revision_nodes[0].firstChild
|
||||||
# TODO: raise proper error
|
assert_repodata_ok(timestamp_node, "missing timestamp node")
|
||||||
assert timestamp_node
|
|
||||||
time = datetime.datetime.utcfromtimestamp(
|
time = datetime.datetime.utcfromtimestamp(
|
||||||
int(timestamp_node.toxml())
|
int(timestamp_node.toxml()) # type: ignore
|
||||||
).isoformat()
|
).isoformat()
|
||||||
|
|
||||||
out.append(
|
out.append(
|
||||||
|
@ -165,11 +168,10 @@ class YumRepo(Repo):
|
||||||
primary_url = "/".join([self.base_url, href])
|
primary_url = "/".join([self.base_url, href])
|
||||||
primary_xml = await self.fetcher(primary_url)
|
primary_xml = await self.fetcher(primary_url)
|
||||||
|
|
||||||
# TODO: raise proper error if missing
|
assert_repodata_ok(primary_xml, f"missing primary XML at {primary_url}")
|
||||||
assert primary_xml
|
|
||||||
|
|
||||||
return sorted(
|
return sorted(
|
||||||
[p.index_entry for p in self.__packages_from_primary(primary_xml)],
|
[p.index_entry for p in self.__packages_from_primary(primary_xml)], # type: ignore
|
||||||
key=lambda e: e.text,
|
key=lambda e: e.text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
132
tests/test_yum_render_corrupt.py
Normal file
132
tests/test_yum_render_corrupt.py
Normal file
|
@ -0,0 +1,132 @@
|
||||||
|
from typing import Optional
|
||||||
|
import textwrap
|
||||||
|
|
||||||
|
from repo_autoindex import autoindex, ContentError
|
||||||
|
|
||||||
|
REPOMD_XML = textwrap.dedent(
|
||||||
|
"""
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
|
||||||
|
<revision>1657165688</revision>
|
||||||
|
<data type="primary">
|
||||||
|
<checksum type="sha256">d4888f04f95ac067af4d997d35c6d345cbe398563d777d017a3634c9ed6148cf</checksum>
|
||||||
|
<open-checksum type="sha256">6fc4eddd4e9de89246efba3815b8a9dec9dfe168e4fd3104cc792dff908a0f62</open-checksum>
|
||||||
|
<location href="repodata/d4888f04f95ac067af4d997d35c6d345cbe398563d777d017a3634c9ed6148cf-primary.xml.gz"/>
|
||||||
|
<timestamp>1657165688</timestamp>
|
||||||
|
<size>2932</size>
|
||||||
|
<open-size>16585</open-size>
|
||||||
|
</data>
|
||||||
|
<data type="filelists">
|
||||||
|
<checksum type="sha256">284769ec79daa9e0a3b0129bb6260cc6271c90c4fe02b43dfa7cdf7635fb803f</checksum>
|
||||||
|
<open-checksum type="sha256">72f89223c8b0f6c7a2ee6ed7fbd16ee0bb395ca68260038bb3895265af84c29f</open-checksum>
|
||||||
|
<location href="repodata/284769ec79daa9e0a3b0129bb6260cc6271c90c4fe02b43dfa7cdf7635fb803f-filelists.xml.gz"/>
|
||||||
|
<timestamp>1657165688</timestamp>
|
||||||
|
<size>4621</size>
|
||||||
|
<open-size>36911</open-size>
|
||||||
|
</data>
|
||||||
|
<data type="other">
|
||||||
|
<checksum type="sha256">36c2195bbee0c39ee080969abc6fd59d943c3471114cfd43c6e776ac20d7ed21</checksum>
|
||||||
|
<open-checksum type="sha256">39f52cf295db14e863abcd7b2eede8e6c5e39ac9b2f194349459d29cd492c90f</open-checksum>
|
||||||
|
<location href="repodata/36c2195bbee0c39ee080969abc6fd59d943c3471114cfd43c6e776ac20d7ed21-other.xml.gz"/>
|
||||||
|
<timestamp>1657165688</timestamp>
|
||||||
|
<size>1408</size>
|
||||||
|
<open-size>8432</open-size>
|
||||||
|
</data>
|
||||||
|
<data type="primary_db">
|
||||||
|
<checksum type="sha256">55e6bfd00e889c5c1f9a3c9fb35a660158bc5d975ae082d434f3cf81cc2c0c21</checksum>
|
||||||
|
<open-checksum type="sha256">b2692c49d1d98d68e764e29108d8a81a3dfd9e04fa7665115853a029396d118d</open-checksum>
|
||||||
|
<location href="repodata/55e6bfd00e889c5c1f9a3c9fb35a660158bc5d975ae082d434f3cf81cc2c0c21-primary.sqlite.bz2"/>
|
||||||
|
<timestamp>1657165688</timestamp>
|
||||||
|
<size>7609</size>
|
||||||
|
<open-size>114688</open-size>
|
||||||
|
<database_version>10</database_version>
|
||||||
|
</data>
|
||||||
|
<data type="filelists_db">
|
||||||
|
<checksum type="sha256">de63a509812c37f7736fcef0b79e9c55dfe67a2d77006f74fdc442935103e9e6</checksum>
|
||||||
|
<open-checksum type="sha256">40eb5d53fe547c98d470813256c9bfc8a239b13697d8eb824a1485c9e186a0e3</open-checksum>
|
||||||
|
<location href="repodata/de63a509812c37f7736fcef0b79e9c55dfe67a2d77006f74fdc442935103e9e6-filelists.sqlite.bz2"/>
|
||||||
|
<timestamp>1657165688</timestamp>
|
||||||
|
<size>10323</size>
|
||||||
|
<open-size>65536</open-size>
|
||||||
|
<database_version>10</database_version>
|
||||||
|
</data>
|
||||||
|
<data type="other_db">
|
||||||
|
<checksum type="sha256">9aa39b62df200cb3784dea24092d0c1c686afff0cd0990c2ec7a61afe8896e1c</checksum>
|
||||||
|
<open-checksum type="sha256">3e5cefb10ce805b827e12ca3b4839bba873dc9403fd92b60a364bf6f312bd972</open-checksum>
|
||||||
|
<location href="repodata/9aa39b62df200cb3784dea24092d0c1c686afff0cd0990c2ec7a61afe8896e1c-other.sqlite.bz2"/>
|
||||||
|
<timestamp>1657165688</timestamp>
|
||||||
|
<size>2758</size>
|
||||||
|
<open-size>32768</open-size>
|
||||||
|
<database_version>10</database_version>
|
||||||
|
</data>
|
||||||
|
</repomd>
|
||||||
|
"""
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
PRIMARY_XML = textwrap.dedent(
|
||||||
|
"""
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="5">
|
||||||
|
<package type="rpm">
|
||||||
|
<name>
|
||||||
|
"""
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
|
||||||
|
class StaticFetcher:
|
||||||
|
def __init__(self):
|
||||||
|
self.content: dict[str, str] = {}
|
||||||
|
|
||||||
|
async def __call__(self, url: str) -> Optional[str]:
|
||||||
|
return self.content.get(url)
|
||||||
|
|
||||||
|
|
||||||
|
async def test_corrupt_repodata():
|
||||||
|
fetcher = StaticFetcher()
|
||||||
|
|
||||||
|
fetcher.content["https://example.com/repodata/repomd.xml"] = REPOMD_XML
|
||||||
|
fetcher.content[
|
||||||
|
"https://example.com/repodata/d4888f04f95ac067af4d997d35c6d345cbe398563d777d017a3634c9ed6148cf-primary.xml.gz"
|
||||||
|
] = PRIMARY_XML
|
||||||
|
|
||||||
|
error = None
|
||||||
|
try:
|
||||||
|
async for _ in autoindex("https://example.com", fetcher=fetcher):
|
||||||
|
pass
|
||||||
|
except ContentError as exc:
|
||||||
|
error = exc
|
||||||
|
|
||||||
|
# It should have raised a ContentError
|
||||||
|
assert error
|
||||||
|
|
||||||
|
# It should summarize
|
||||||
|
assert "Invalid content found at https://example.com" in str(error)
|
||||||
|
|
||||||
|
# We don't want the test to depend on precise details, but it should have
|
||||||
|
# some cause coming from the XML parser
|
||||||
|
assert "xml" in error.__cause__.__module__
|
||||||
|
|
||||||
|
|
||||||
|
async def test_missing_primary():
|
||||||
|
fetcher = StaticFetcher()
|
||||||
|
|
||||||
|
fetcher.content["https://example.com/repodata/repomd.xml"] = REPOMD_XML
|
||||||
|
|
||||||
|
error = None
|
||||||
|
try:
|
||||||
|
async for _ in autoindex("https://example.com", fetcher=fetcher):
|
||||||
|
pass
|
||||||
|
except ContentError as exc:
|
||||||
|
error = exc
|
||||||
|
|
||||||
|
# It should have raised a ContentError
|
||||||
|
assert error
|
||||||
|
|
||||||
|
# It should state the reason
|
||||||
|
assert (
|
||||||
|
"missing primary XML at https://example.com/repodata/d4888f04f95ac067af4d997d35c6d345cbe398563d777d017a3634c9ed6148cf-primary.xml.gz"
|
||||||
|
in str(error)
|
||||||
|
)
|
||||||
|
|
||||||
|
# This one doesn't have a separate cause as it was raised explicitly by our code
|
||||||
|
assert not error.__cause__
|
Loading…
Add table
Reference in a new issue