mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-19 06:15:49 +00:00
fix(ingest): fix metadata for custom python packages (#9391)
This commit is contained in:
parent
0e40d38f4c
commit
d52f0305eb
@ -256,7 +256,7 @@ to deploy during development. This will allow Datahub to read and write your new
|
|||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
<Tabs>
|
<Tabs queryString="python-custom-models">
|
||||||
<TabItem value="local" label="Local CLI" default>
|
<TabItem value="local" label="Local CLI" default>
|
||||||
|
|
||||||
If you're purely using the custom models locally, you can use a local development-mode install of the DataHub CLI.
|
If you're purely using the custom models locally, you can use a local development-mode install of the DataHub CLI.
|
||||||
@ -273,12 +273,21 @@ If you want to use your custom models beyond your local machine without forking
|
|||||||
This package should be installed alongside the base `acryl-datahub` package, and its metadata models will take precedence over the default ones.
|
This package should be installed alongside the base `acryl-datahub` package, and its metadata models will take precedence over the default ones.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd metadata-ingestion
|
$ cd metadata-ingestion
|
||||||
../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1"
|
$ ../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1"
|
||||||
|
<bunch of log lines>
|
||||||
|
Successfully built my-company-datahub-models-0.0.1.tar.gz and acryl_datahub_cloud-0.0.1-py3-none-any.whl
|
||||||
|
|
||||||
|
Generated package at custom-package/my-company-datahub-models
|
||||||
|
This package should be installed alongside the main acryl-datahub package.
|
||||||
|
|
||||||
|
Install the custom package locally with `pip install custom-package/my-company-datahub-models`
|
||||||
|
To enable others to use it, share the file at custom-package/my-company-datahub-models/dist/*.whl and have them install it with `pip install <wheel file>.whl`
|
||||||
|
Alternatively, publish it to PyPI with `twine upload custom-package/my-company-datahub-models/dist/*`
|
||||||
```
|
```
|
||||||
|
|
||||||
This will generate some Python build artifacts, which you can distribute within your team or publish to PyPI.
|
This will generate some Python build artifacts, which you can distribute within your team or publish to PyPI.
|
||||||
The command output will contain additional details and exact CLI commands you can use.
|
The command output contains additional details and exact CLI commands you can use.
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|||||||
@ -252,34 +252,12 @@ def annotate_aspects(aspects: List[dict], schema_class_file: Path) -> None:
|
|||||||
schema_classes_lines = schema_class_file.read_text().splitlines()
|
schema_classes_lines = schema_class_file.read_text().splitlines()
|
||||||
line_lookup_table = {line: i for i, line in enumerate(schema_classes_lines)}
|
line_lookup_table = {line: i for i, line in enumerate(schema_classes_lines)}
|
||||||
|
|
||||||
# Create the Aspect class.
|
# Import the _Aspect class.
|
||||||
# We ensure that it cannot be instantiated directly, as
|
|
||||||
# per https://stackoverflow.com/a/7989101/5004662.
|
|
||||||
schema_classes_lines[
|
schema_classes_lines[
|
||||||
line_lookup_table["__SCHEMAS: Dict[str, RecordSchema] = {}"]
|
line_lookup_table["__SCHEMAS: Dict[str, RecordSchema] = {}"]
|
||||||
] += """
|
] += """
|
||||||
|
|
||||||
class _Aspect(DictWrapper):
|
from datahub._codegen.aspect import _Aspect
|
||||||
ASPECT_NAME: ClassVar[str] = None # type: ignore
|
|
||||||
ASPECT_TYPE: ClassVar[str] = "default"
|
|
||||||
ASPECT_INFO: ClassVar[dict] = None # type: ignore
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
if type(self) is _Aspect:
|
|
||||||
raise TypeError("_Aspect is an abstract class, and cannot be instantiated directly.")
|
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_aspect_name(cls) -> str:
|
|
||||||
return cls.ASPECT_NAME # type: ignore
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_aspect_type(cls) -> str:
|
|
||||||
return cls.ASPECT_TYPE
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_aspect_info(cls) -> dict:
|
|
||||||
return cls.ASPECT_INFO
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for aspect in aspects:
|
for aspect in aspects:
|
||||||
@ -776,6 +754,7 @@ def generate(
|
|||||||
import importlib
|
import importlib
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from datahub._codegen.aspect import _Aspect
|
||||||
from datahub.utilities.docs_build import IS_SPHINX_BUILD
|
from datahub.utilities.docs_build import IS_SPHINX_BUILD
|
||||||
from datahub.utilities._custom_package_loader import get_custom_models_package
|
from datahub.utilities._custom_package_loader import get_custom_models_package
|
||||||
|
|
||||||
@ -785,7 +764,7 @@ if TYPE_CHECKING or not _custom_package_path:
|
|||||||
from ._schema_classes import *
|
from ._schema_classes import *
|
||||||
|
|
||||||
# Required explicitly because __all__ doesn't include _ prefixed names.
|
# Required explicitly because __all__ doesn't include _ prefixed names.
|
||||||
from ._schema_classes import _Aspect, __SCHEMA_TYPES
|
from ._schema_classes import __SCHEMA_TYPES
|
||||||
|
|
||||||
if IS_SPHINX_BUILD:
|
if IS_SPHINX_BUILD:
|
||||||
# Set __module__ to the current module so that Sphinx will document the
|
# Set __module__ to the current module so that Sphinx will document the
|
||||||
|
|||||||
@ -73,6 +73,8 @@ __version__ = "{package_version}"
|
|||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
(src_path / "py.typed").write_text("")
|
||||||
|
|
||||||
(package_path / "setup.py").write_text(
|
(package_path / "setup.py").write_text(
|
||||||
f"""{autogen_header}
|
f"""{autogen_header}
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
@ -87,6 +89,11 @@ setup(
|
|||||||
"avro-gen3=={_avrogen_version}",
|
"avro-gen3=={_avrogen_version}",
|
||||||
"acryl-datahub",
|
"acryl-datahub",
|
||||||
],
|
],
|
||||||
|
package_data={{
|
||||||
|
"{python_package_name}": ["py.typed"],
|
||||||
|
"{python_package_name}.models": ["schema.avsc"],
|
||||||
|
"{python_package_name}.models.schemas": ["*.avsc"],
|
||||||
|
}},
|
||||||
entry_points={{
|
entry_points={{
|
||||||
"datahub.custom_packages": [
|
"datahub.custom_packages": [
|
||||||
"models={python_package_name}.models.schema_classes",
|
"models={python_package_name}.models.schema_classes",
|
||||||
|
|||||||
0
metadata-ingestion/src/datahub/_codegen/__init__.py
Normal file
0
metadata-ingestion/src/datahub/_codegen/__init__.py
Normal file
36
metadata-ingestion/src/datahub/_codegen/aspect.py
Normal file
36
metadata-ingestion/src/datahub/_codegen/aspect.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from typing import ClassVar
|
||||||
|
|
||||||
|
from avrogen.dict_wrapper import DictWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class _Aspect(DictWrapper):
|
||||||
|
"""Base class for all aspects types.
|
||||||
|
|
||||||
|
All codegened types inherit from DictWrapper, either directly or indirectly.
|
||||||
|
Types that are aspects inherit directly from _Aspect.
|
||||||
|
"""
|
||||||
|
|
||||||
|
ASPECT_NAME: ClassVar[str] = None # type: ignore
|
||||||
|
ASPECT_TYPE: ClassVar[str] = "default"
|
||||||
|
ASPECT_INFO: ClassVar[dict] = None # type: ignore
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if type(self) is _Aspect:
|
||||||
|
# Ensure that it cannot be instantiated directly, as
|
||||||
|
# per https://stackoverflow.com/a/7989101/5004662.
|
||||||
|
raise TypeError(
|
||||||
|
"_Aspect is an abstract class, and cannot be instantiated directly."
|
||||||
|
)
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_aspect_name(cls) -> str:
|
||||||
|
return cls.ASPECT_NAME # type: ignore
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_aspect_type(cls) -> str:
|
||||||
|
return cls.ASPECT_TYPE
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_aspect_info(cls) -> dict:
|
||||||
|
return cls.ASPECT_INFO
|
||||||
Loading…
x
Reference in New Issue
Block a user