feat(ingest): rename custom package path from models to metadata (#9502)

This commit is contained in:
Harshal Sheth 2023-12-21 20:30:36 -05:00 committed by GitHub
parent ca518d6c78
commit be329986ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 18 additions and 5 deletions

View File

@ -289,6 +289,14 @@ Alternatively, publish it to PyPI with `twine upload custom-package/my-company-d
This will generate some Python build artifacts, which you can distribute within your team or publish to PyPI.
The command output contains additional details and exact CLI commands you can use.
Once this package is installed, you can use the DataHub CLI as normal, and it will use your custom models.
You'll also be able to import those models, with IDE support, by changing your imports.
```diff
- from datahub.metadata.schema_classes import DatasetPropertiesClass
+ from my_company_datahub_models.metadata.schema_classes import DatasetPropertiesClass
```
</TabItem>
</Tabs>

View File

@ -62,7 +62,7 @@ def generate(
entity_registry=entity_registry,
pdl_path=pdl_path,
schemas_path=schemas_path,
outdir=str(src_path / "models"),
outdir=str(src_path / "metadata"),
enable_custom_loader=False,
)
@ -91,13 +91,13 @@ setup(
],
package_data={{
"{python_package_name}": ["py.typed"],
"{python_package_name}.models": ["schema.avsc"],
"{python_package_name}.models.schemas": ["*.avsc"],
"{python_package_name}.metadata": ["schema.avsc"],
"{python_package_name}.metadata.schemas": ["*.avsc"],
}},
entry_points={{
"datahub.custom_packages": [
"models={python_package_name}.models.schema_classes",
"urns={python_package_name}.models._urns.urn_defs",
"models={python_package_name}.metadata.schema_classes",
"urns={python_package_name}.metadata._urns.urn_defs",
],
}},
)

View File

@ -16,6 +16,7 @@ import datahub as datahub_package
from datahub.cli.cli_utils import DATAHUB_ROOT_FOLDER, get_boolean_env_variable
from datahub.configuration.common import ExceptionWithProps
from datahub.ingestion.graph.client import DataHubGraph
from datahub.metadata.schema_classes import _custom_package_path
from datahub.utilities.perf_timer import PerfTimer
logger = logging.getLogger(__name__)
@ -89,6 +90,10 @@ CI_ENV_VARS = {
if any(var in os.environ for var in CI_ENV_VARS):
ENV_ENABLED = False
# Also disable if a custom metadata model package is in use.
if _custom_package_path:
ENV_ENABLED = False
TIMEOUT = int(os.environ.get("DATAHUB_TELEMETRY_TIMEOUT", "10"))
MIXPANEL_ENDPOINT = "track.datahubproject.io/mp"
MIXPANEL_TOKEN = "5ee83d940754d63cacbf7d34daa6f44a"