fix(ingest): fix metadata for custom python packages (#9391)

This commit is contained in:
Harshal Sheth 2023-12-08 13:13:49 -05:00 committed by GitHub
parent 0e40d38f4c
commit d52f0305eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 60 additions and 29 deletions

View File

@ -256,7 +256,7 @@ to deploy during development. This will allow Datahub to read and write your new
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
<Tabs>
<Tabs queryString="python-custom-models">
<TabItem value="local" label="Local CLI" default>
If you're purely using the custom models locally, you can use a local development-mode install of the DataHub CLI.
@ -273,12 +273,21 @@ If you want to use your custom models beyond your local machine without forking
This package should be installed alongside the base `acryl-datahub` package, and its metadata models will take precedence over the default ones.
```bash
cd metadata-ingestion
../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1"
$ cd metadata-ingestion
$ ../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1"
<bunch of log lines>
Successfully built my-company-datahub-models-0.0.1.tar.gz and acryl_datahub_cloud-0.0.1-py3-none-any.whl
Generated package at custom-package/my-company-datahub-models
This package should be installed alongside the main acryl-datahub package.
Install the custom package locally with `pip install custom-package/my-company-datahub-models`
To enable others to use it, share the file at custom-package/my-company-datahub-models/dist/*.whl and have them install it with `pip install <wheel file>.whl`
Alternatively, publish it to PyPI with `twine upload custom-package/my-company-datahub-models/dist/*`
```
This will generate some Python build artifacts, which you can distribute within your team or publish to PyPI.
The command output will contain additional details and exact CLI commands you can use.
The command output contains additional details and exact CLI commands you can use.
</TabItem>
</Tabs>

View File

@ -252,34 +252,12 @@ def annotate_aspects(aspects: List[dict], schema_class_file: Path) -> None:
schema_classes_lines = schema_class_file.read_text().splitlines()
line_lookup_table = {line: i for i, line in enumerate(schema_classes_lines)}
# Create the Aspect class.
# We ensure that it cannot be instantiated directly, as
# per https://stackoverflow.com/a/7989101/5004662.
# Import the _Aspect class.
schema_classes_lines[
line_lookup_table["__SCHEMAS: Dict[str, RecordSchema] = {}"]
] += """
class _Aspect(DictWrapper):
ASPECT_NAME: ClassVar[str] = None # type: ignore
ASPECT_TYPE: ClassVar[str] = "default"
ASPECT_INFO: ClassVar[dict] = None # type: ignore
def __init__(self):
if type(self) is _Aspect:
raise TypeError("_Aspect is an abstract class, and cannot be instantiated directly.")
super().__init__()
@classmethod
def get_aspect_name(cls) -> str:
return cls.ASPECT_NAME # type: ignore
@classmethod
def get_aspect_type(cls) -> str:
return cls.ASPECT_TYPE
@classmethod
def get_aspect_info(cls) -> dict:
return cls.ASPECT_INFO
from datahub._codegen.aspect import _Aspect
"""
for aspect in aspects:
@ -776,6 +754,7 @@ def generate(
import importlib
from typing import TYPE_CHECKING
from datahub._codegen.aspect import _Aspect
from datahub.utilities.docs_build import IS_SPHINX_BUILD
from datahub.utilities._custom_package_loader import get_custom_models_package
@ -785,7 +764,7 @@ if TYPE_CHECKING or not _custom_package_path:
from ._schema_classes import *
# Required explicitly because __all__ doesn't include _ prefixed names.
from ._schema_classes import _Aspect, __SCHEMA_TYPES
from ._schema_classes import __SCHEMA_TYPES
if IS_SPHINX_BUILD:
# Set __module__ to the current module so that Sphinx will document the

View File

@ -73,6 +73,8 @@ __version__ = "{package_version}"
"""
)
(src_path / "py.typed").write_text("")
(package_path / "setup.py").write_text(
f"""{autogen_header}
from setuptools import setup
@ -87,6 +89,11 @@ setup(
"avro-gen3=={_avrogen_version}",
"acryl-datahub",
],
package_data={{
"{python_package_name}": ["py.typed"],
"{python_package_name}.models": ["schema.avsc"],
"{python_package_name}.models.schemas": ["*.avsc"],
}},
entry_points={{
"datahub.custom_packages": [
"models={python_package_name}.models.schema_classes",

View File

@ -0,0 +1,36 @@
from typing import ClassVar
from avrogen.dict_wrapper import DictWrapper
class _Aspect(DictWrapper):
"""Base class for all aspects types.
All codegened types inherit from DictWrapper, either directly or indirectly.
Types that are aspects inherit directly from _Aspect.
"""
ASPECT_NAME: ClassVar[str] = None # type: ignore
ASPECT_TYPE: ClassVar[str] = "default"
ASPECT_INFO: ClassVar[dict] = None # type: ignore
def __init__(self):
if type(self) is _Aspect:
# Ensure that it cannot be instantiated directly, as
# per https://stackoverflow.com/a/7989101/5004662.
raise TypeError(
"_Aspect is an abstract class, and cannot be instantiated directly."
)
super().__init__()
@classmethod
def get_aspect_name(cls) -> str:
return cls.ASPECT_NAME # type: ignore
@classmethod
def get_aspect_type(cls) -> str:
return cls.ASPECT_TYPE
@classmethod
def get_aspect_info(cls) -> dict:
return cls.ASPECT_INFO