diff --git a/docs/modeling/extending-the-metadata-model.md b/docs/modeling/extending-the-metadata-model.md index ba101be16b..293688a8b8 100644 --- a/docs/modeling/extending-the-metadata-model.md +++ b/docs/modeling/extending-the-metadata-model.md @@ -256,7 +256,7 @@ to deploy during development. This will allow Datahub to read and write your new import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - + If you're purely using the custom models locally, you can use a local development-mode install of the DataHub CLI. @@ -273,12 +273,21 @@ If you want to use your custom models beyond your local machine without forking This package should be installed alongside the base `acryl-datahub` package, and its metadata models will take precedence over the default ones. ```bash -cd metadata-ingestion -../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1" +$ cd metadata-ingestion +$ ../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1" + +Successfully built my-company-datahub-models-0.0.1.tar.gz and acryl_datahub_cloud-0.0.1-py3-none-any.whl + +Generated package at custom-package/my-company-datahub-models +This package should be installed alongside the main acryl-datahub package. + +Install the custom package locally with `pip install custom-package/my-company-datahub-models` +To enable others to use it, share the file at custom-package/my-company-datahub-models/dist/*.whl and have them install it with `pip install .whl` +Alternatively, publish it to PyPI with `twine upload custom-package/my-company-datahub-models/dist/*` ``` This will generate some Python build artifacts, which you can distribute within your team or publish to PyPI. -The command output will contain additional details and exact CLI commands you can use. +The command output contains additional details and exact CLI commands you can use. diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py index c6f6bac128..bd4988f990 100644 --- a/metadata-ingestion/scripts/avro_codegen.py +++ b/metadata-ingestion/scripts/avro_codegen.py @@ -252,34 +252,12 @@ def annotate_aspects(aspects: List[dict], schema_class_file: Path) -> None: schema_classes_lines = schema_class_file.read_text().splitlines() line_lookup_table = {line: i for i, line in enumerate(schema_classes_lines)} - # Create the Aspect class. - # We ensure that it cannot be instantiated directly, as - # per https://stackoverflow.com/a/7989101/5004662. + # Import the _Aspect class. schema_classes_lines[ line_lookup_table["__SCHEMAS: Dict[str, RecordSchema] = {}"] ] += """ -class _Aspect(DictWrapper): - ASPECT_NAME: ClassVar[str] = None # type: ignore - ASPECT_TYPE: ClassVar[str] = "default" - ASPECT_INFO: ClassVar[dict] = None # type: ignore - - def __init__(self): - if type(self) is _Aspect: - raise TypeError("_Aspect is an abstract class, and cannot be instantiated directly.") - super().__init__() - - @classmethod - def get_aspect_name(cls) -> str: - return cls.ASPECT_NAME # type: ignore - - @classmethod - def get_aspect_type(cls) -> str: - return cls.ASPECT_TYPE - - @classmethod - def get_aspect_info(cls) -> dict: - return cls.ASPECT_INFO +from datahub._codegen.aspect import _Aspect """ for aspect in aspects: @@ -776,6 +754,7 @@ def generate( import importlib from typing import TYPE_CHECKING +from datahub._codegen.aspect import _Aspect from datahub.utilities.docs_build import IS_SPHINX_BUILD from datahub.utilities._custom_package_loader import get_custom_models_package @@ -785,7 +764,7 @@ if TYPE_CHECKING or not _custom_package_path: from ._schema_classes import * # Required explicitly because __all__ doesn't include _ prefixed names. - from ._schema_classes import _Aspect, __SCHEMA_TYPES + from ._schema_classes import __SCHEMA_TYPES if IS_SPHINX_BUILD: # Set __module__ to the current module so that Sphinx will document the diff --git a/metadata-ingestion/scripts/custom_package_codegen.py b/metadata-ingestion/scripts/custom_package_codegen.py index a5883c9ae9..8582e16598 100644 --- a/metadata-ingestion/scripts/custom_package_codegen.py +++ b/metadata-ingestion/scripts/custom_package_codegen.py @@ -73,6 +73,8 @@ __version__ = "{package_version}" """ ) + (src_path / "py.typed").write_text("") + (package_path / "setup.py").write_text( f"""{autogen_header} from setuptools import setup @@ -87,6 +89,11 @@ setup( "avro-gen3=={_avrogen_version}", "acryl-datahub", ], + package_data={{ + "{python_package_name}": ["py.typed"], + "{python_package_name}.models": ["schema.avsc"], + "{python_package_name}.models.schemas": ["*.avsc"], + }}, entry_points={{ "datahub.custom_packages": [ "models={python_package_name}.models.schema_classes", diff --git a/metadata-ingestion/src/datahub/_codegen/__init__.py b/metadata-ingestion/src/datahub/_codegen/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/metadata-ingestion/src/datahub/_codegen/aspect.py b/metadata-ingestion/src/datahub/_codegen/aspect.py new file mode 100644 index 0000000000..28fa3f1536 --- /dev/null +++ b/metadata-ingestion/src/datahub/_codegen/aspect.py @@ -0,0 +1,36 @@ +from typing import ClassVar + +from avrogen.dict_wrapper import DictWrapper + + +class _Aspect(DictWrapper): + """Base class for all aspects types. + + All codegened types inherit from DictWrapper, either directly or indirectly. + Types that are aspects inherit directly from _Aspect. + """ + + ASPECT_NAME: ClassVar[str] = None # type: ignore + ASPECT_TYPE: ClassVar[str] = "default" + ASPECT_INFO: ClassVar[dict] = None # type: ignore + + def __init__(self): + if type(self) is _Aspect: + # Ensure that it cannot be instantiated directly, as + # per https://stackoverflow.com/a/7989101/5004662. + raise TypeError( + "_Aspect is an abstract class, and cannot be instantiated directly." + ) + super().__init__() + + @classmethod + def get_aspect_name(cls) -> str: + return cls.ASPECT_NAME # type: ignore + + @classmethod + def get_aspect_type(cls) -> str: + return cls.ASPECT_TYPE + + @classmethod + def get_aspect_info(cls) -> dict: + return cls.ASPECT_INFO