unstructured/typings/lxml/etree/_nsclasses.pyi
Steve Canny 6fe1c9980e
rfctr(html): prepare for new html parser (#3257)
**Summary**
Extract as much mechanical refactoring from the HTML parser change-over
into the PR as possible. This leaves the next PR focused on installing
the new parser and the ingest-test impact.

**Reviewers:** Commits are well groomed and reviewing commit-by-commit
is probably easier.

**Additional Context**
This PR introduces the rewritten HTML parser. Its general design is
recursive, consistent with the recursive structure of HTML (tree of
elements). It also adds the unit tests for that parser but it does not
_install_ the parser. So the behavior of `partition_html()` is unchanged
by this PR. The next PR in this series will do that and handle the
ingest and other unit test changes required to reflect the dozen or so
bug-fixes the new parser provides.
2024-06-21 20:59:48 +00:00

32 lines
1.2 KiB
Python

# pyright: reportPrivateUsage=false
from __future__ import annotations
from typing import Iterable, Iterator, MutableMapping, TypeVar
from .._types import SupportsLaxedItems
from ._classlookup import ElementBase, ElementClassLookup, FallbackElementClassLookup
_KT = TypeVar("_KT")
_VT = TypeVar("_VT")
class _NamespaceRegistry(MutableMapping[_KT, _VT]):
def __delitem__(self, __key: _KT) -> None: ...
def __getitem__(self, __key: _KT) -> _VT: ...
def __setitem__(self, __key: _KT, __value: _VT) -> None: ...
def __iter__(self) -> Iterator[_KT]: ...
def __len__(self) -> int: ...
def update( # type: ignore[override]
self,
class_dict_iterable: SupportsLaxedItems[_KT, _VT] | Iterable[tuple[_KT, _VT]],
) -> None: ...
def items(self) -> list[tuple[_KT, _VT]]: ... # type: ignore[override]
def iteritems(self) -> Iterator[tuple[_KT, _VT]]: ...
def clear(self) -> None: ...
class _ClassNamespaceRegistry(_NamespaceRegistry[str | None, type[ElementBase]]): ...
class ElementNamespaceClassLookup(FallbackElementClassLookup):
def __init__(self, fallback: ElementClassLookup | None = None) -> None: ...
def get_namespace(self, ns_uri: str | None) -> _ClassNamespaceRegistry: ...