mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-01 19:25:56 +00:00
fix(ingest): fix metadata for custom python packages (#9391)
This commit is contained in:
parent
0e40d38f4c
commit
d52f0305eb
@ -256,7 +256,7 @@ to deploy during development. This will allow Datahub to read and write your new
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
<Tabs>
|
||||
<Tabs queryString="python-custom-models">
|
||||
<TabItem value="local" label="Local CLI" default>
|
||||
|
||||
If you're purely using the custom models locally, you can use a local development-mode install of the DataHub CLI.
|
||||
@ -273,12 +273,21 @@ If you want to use your custom models beyond your local machine without forking
|
||||
This package should be installed alongside the base `acryl-datahub` package, and its metadata models will take precedence over the default ones.
|
||||
|
||||
```bash
|
||||
cd metadata-ingestion
|
||||
../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1"
|
||||
$ cd metadata-ingestion
|
||||
$ ../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1"
|
||||
<bunch of log lines>
|
||||
Successfully built my-company-datahub-models-0.0.1.tar.gz and acryl_datahub_cloud-0.0.1-py3-none-any.whl
|
||||
|
||||
Generated package at custom-package/my-company-datahub-models
|
||||
This package should be installed alongside the main acryl-datahub package.
|
||||
|
||||
Install the custom package locally with `pip install custom-package/my-company-datahub-models`
|
||||
To enable others to use it, share the file at custom-package/my-company-datahub-models/dist/*.whl and have them install it with `pip install <wheel file>.whl`
|
||||
Alternatively, publish it to PyPI with `twine upload custom-package/my-company-datahub-models/dist/*`
|
||||
```
|
||||
|
||||
This will generate some Python build artifacts, which you can distribute within your team or publish to PyPI.
|
||||
The command output will contain additional details and exact CLI commands you can use.
|
||||
The command output contains additional details and exact CLI commands you can use.
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
@ -252,34 +252,12 @@ def annotate_aspects(aspects: List[dict], schema_class_file: Path) -> None:
|
||||
schema_classes_lines = schema_class_file.read_text().splitlines()
|
||||
line_lookup_table = {line: i for i, line in enumerate(schema_classes_lines)}
|
||||
|
||||
# Create the Aspect class.
|
||||
# We ensure that it cannot be instantiated directly, as
|
||||
# per https://stackoverflow.com/a/7989101/5004662.
|
||||
# Import the _Aspect class.
|
||||
schema_classes_lines[
|
||||
line_lookup_table["__SCHEMAS: Dict[str, RecordSchema] = {}"]
|
||||
] += """
|
||||
|
||||
class _Aspect(DictWrapper):
|
||||
ASPECT_NAME: ClassVar[str] = None # type: ignore
|
||||
ASPECT_TYPE: ClassVar[str] = "default"
|
||||
ASPECT_INFO: ClassVar[dict] = None # type: ignore
|
||||
|
||||
def __init__(self):
|
||||
if type(self) is _Aspect:
|
||||
raise TypeError("_Aspect is an abstract class, and cannot be instantiated directly.")
|
||||
super().__init__()
|
||||
|
||||
@classmethod
|
||||
def get_aspect_name(cls) -> str:
|
||||
return cls.ASPECT_NAME # type: ignore
|
||||
|
||||
@classmethod
|
||||
def get_aspect_type(cls) -> str:
|
||||
return cls.ASPECT_TYPE
|
||||
|
||||
@classmethod
|
||||
def get_aspect_info(cls) -> dict:
|
||||
return cls.ASPECT_INFO
|
||||
from datahub._codegen.aspect import _Aspect
|
||||
"""
|
||||
|
||||
for aspect in aspects:
|
||||
@ -776,6 +754,7 @@ def generate(
|
||||
import importlib
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from datahub._codegen.aspect import _Aspect
|
||||
from datahub.utilities.docs_build import IS_SPHINX_BUILD
|
||||
from datahub.utilities._custom_package_loader import get_custom_models_package
|
||||
|
||||
@ -785,7 +764,7 @@ if TYPE_CHECKING or not _custom_package_path:
|
||||
from ._schema_classes import *
|
||||
|
||||
# Required explicitly because __all__ doesn't include _ prefixed names.
|
||||
from ._schema_classes import _Aspect, __SCHEMA_TYPES
|
||||
from ._schema_classes import __SCHEMA_TYPES
|
||||
|
||||
if IS_SPHINX_BUILD:
|
||||
# Set __module__ to the current module so that Sphinx will document the
|
||||
|
||||
@ -73,6 +73,8 @@ __version__ = "{package_version}"
|
||||
"""
|
||||
)
|
||||
|
||||
(src_path / "py.typed").write_text("")
|
||||
|
||||
(package_path / "setup.py").write_text(
|
||||
f"""{autogen_header}
|
||||
from setuptools import setup
|
||||
@ -87,6 +89,11 @@ setup(
|
||||
"avro-gen3=={_avrogen_version}",
|
||||
"acryl-datahub",
|
||||
],
|
||||
package_data={{
|
||||
"{python_package_name}": ["py.typed"],
|
||||
"{python_package_name}.models": ["schema.avsc"],
|
||||
"{python_package_name}.models.schemas": ["*.avsc"],
|
||||
}},
|
||||
entry_points={{
|
||||
"datahub.custom_packages": [
|
||||
"models={python_package_name}.models.schema_classes",
|
||||
|
||||
0
metadata-ingestion/src/datahub/_codegen/__init__.py
Normal file
0
metadata-ingestion/src/datahub/_codegen/__init__.py
Normal file
36
metadata-ingestion/src/datahub/_codegen/aspect.py
Normal file
36
metadata-ingestion/src/datahub/_codegen/aspect.py
Normal file
@ -0,0 +1,36 @@
|
||||
from typing import ClassVar
|
||||
|
||||
from avrogen.dict_wrapper import DictWrapper
|
||||
|
||||
|
||||
class _Aspect(DictWrapper):
|
||||
"""Base class for all aspects types.
|
||||
|
||||
All codegened types inherit from DictWrapper, either directly or indirectly.
|
||||
Types that are aspects inherit directly from _Aspect.
|
||||
"""
|
||||
|
||||
ASPECT_NAME: ClassVar[str] = None # type: ignore
|
||||
ASPECT_TYPE: ClassVar[str] = "default"
|
||||
ASPECT_INFO: ClassVar[dict] = None # type: ignore
|
||||
|
||||
def __init__(self):
|
||||
if type(self) is _Aspect:
|
||||
# Ensure that it cannot be instantiated directly, as
|
||||
# per https://stackoverflow.com/a/7989101/5004662.
|
||||
raise TypeError(
|
||||
"_Aspect is an abstract class, and cannot be instantiated directly."
|
||||
)
|
||||
super().__init__()
|
||||
|
||||
@classmethod
|
||||
def get_aspect_name(cls) -> str:
|
||||
return cls.ASPECT_NAME # type: ignore
|
||||
|
||||
@classmethod
|
||||
def get_aspect_type(cls) -> str:
|
||||
return cls.ASPECT_TYPE
|
||||
|
||||
@classmethod
|
||||
def get_aspect_info(cls) -> dict:
|
||||
return cls.ASPECT_INFO
|
||||
Loading…
x
Reference in New Issue
Block a user