mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-24 08:28:12 +00:00
feat(ingest): use entry point for registering transformers (#6628)
This commit is contained in:
parent
f0f0355a83
commit
05e18a0ae7
@ -1145,7 +1145,7 @@ def transform_one(self, mce: MetadataChangeEventClass) -> MetadataChangeEventCla
|
||||
Now that we've defined the transformer, we need to make it visible to DataHub. The easiest way to do this is to just place it in the same directory as your recipe, in which case the module name is the same as the file – in this case, `custom_transform_example`.
|
||||
|
||||
<details>
|
||||
<summary>Advanced: installing as a package</summary>
|
||||
<summary>Advanced: Installing as a package and enable discoverability</summary>
|
||||
Alternatively, create a `setup.py` in the same directory as our transform script to make it visible globally. After installing this package (e.g. with `python setup.py` or `pip install -e .`), our module will be installed and importable as `custom_transform_example`.
|
||||
|
||||
```python
|
||||
@ -1156,17 +1156,25 @@ setup(
|
||||
version="1.0",
|
||||
packages=find_packages(),
|
||||
# if you don't already have DataHub installed, add it under install_requires
|
||||
# install_requires=["acryl-datahub"]
|
||||
# install_requires=["acryl-datahub"],
|
||||
entry_points={
|
||||
"datahub.ingestion.transformer.plugins": [
|
||||
"custom_transform_example_alias = custom_transform_example:AddCustomOwnership",
|
||||
],
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
Additionally, declare the transformer under the `entry_points` variable of the setup script. This enables the transformer to be
|
||||
listed when running `datahub check plugins`, and sets up the transformer's shortened alias for use in recipes.
|
||||
|
||||
</details>
|
||||
|
||||
### Running the transform
|
||||
|
||||
```yaml
|
||||
transformers:
|
||||
- type: "custom_transform_example.AddCustomOwnership"
|
||||
- type: "custom_transform_example_alias"
|
||||
config:
|
||||
owners_json: "<path_to_owners_json>" # the JSON file mentioned at the start
|
||||
```
|
||||
|
||||
@ -545,6 +545,27 @@ entry_points = {
|
||||
"demo-data = datahub.ingestion.source.demo_data.DemoDataSource",
|
||||
"unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource",
|
||||
],
|
||||
"datahub.ingestion.transformer.plugins": [
|
||||
"simple_remove_dataset_ownership = datahub.ingestion.transformer.remove_dataset_ownership:SimpleRemoveDatasetOwnership",
|
||||
"mark_dataset_status = datahub.ingestion.transformer.mark_dataset_status:MarkDatasetStatus",
|
||||
"set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer",
|
||||
"add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:AddDatasetOwnership",
|
||||
"simple_add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:SimpleAddDatasetOwnership",
|
||||
"pattern_add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:PatternAddDatasetOwnership",
|
||||
"add_dataset_domain = datahub.ingestion.transformer.dataset_domain:AddDatasetDomain",
|
||||
"simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain",
|
||||
"pattern_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:PatternAddDatasetDomain",
|
||||
"add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:AddDatasetTags",
|
||||
"simple_add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:SimpleAddDatasetTags",
|
||||
"pattern_add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:PatternAddDatasetTags",
|
||||
"add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:AddDatasetTerms",
|
||||
"simple_add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:SimpleAddDatasetTerms",
|
||||
"pattern_add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:PatternAddDatasetTerms",
|
||||
"add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:AddDatasetProperties",
|
||||
"simple_add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:SimpleAddDatasetProperties",
|
||||
"pattern_add_dataset_schema_terms = datahub.ingestion.transformer.add_dataset_schema_terms:PatternAddDatasetSchemaTerms",
|
||||
"pattern_add_dataset_schema_tags = datahub.ingestion.transformer.add_dataset_schema_tags:PatternAddDatasetSchemaTags",
|
||||
],
|
||||
"datahub.ingestion.sink.plugins": [
|
||||
"file = datahub.ingestion.sink.file:FileSink",
|
||||
"console = datahub.ingestion.sink.console:ConsoleSink",
|
||||
|
||||
@ -1,74 +1,26 @@
|
||||
from datahub.ingestion.api.registry import PluginRegistry
|
||||
from datahub.ingestion.api.transform import Transformer
|
||||
from datahub.ingestion.transformer import dataset_domain
|
||||
from datahub.ingestion.transformer.add_dataset_browse_path import (
|
||||
AddDatasetBrowsePathTransformer,
|
||||
)
|
||||
from datahub.ingestion.transformer.add_dataset_ownership import (
|
||||
AddDatasetOwnership,
|
||||
PatternAddDatasetOwnership,
|
||||
SimpleAddDatasetOwnership,
|
||||
)
|
||||
from datahub.ingestion.transformer.add_dataset_properties import (
|
||||
AddDatasetProperties,
|
||||
SimpleAddDatasetProperties,
|
||||
)
|
||||
from datahub.ingestion.transformer.add_dataset_schema_tags import (
|
||||
PatternAddDatasetSchemaTags,
|
||||
)
|
||||
from datahub.ingestion.transformer.add_dataset_schema_terms import (
|
||||
PatternAddDatasetSchemaTerms,
|
||||
)
|
||||
from datahub.ingestion.transformer.add_dataset_tags import (
|
||||
AddDatasetTags,
|
||||
PatternAddDatasetTags,
|
||||
SimpleAddDatasetTags,
|
||||
)
|
||||
from datahub.ingestion.transformer.add_dataset_terms import (
|
||||
AddDatasetTerms,
|
||||
PatternAddDatasetTerms,
|
||||
SimpleAddDatasetTerms,
|
||||
)
|
||||
from datahub.ingestion.transformer.mark_dataset_status import MarkDatasetStatus
|
||||
from datahub.ingestion.transformer.remove_dataset_ownership import (
|
||||
SimpleRemoveDatasetOwnership,
|
||||
)
|
||||
|
||||
transform_registry = PluginRegistry[Transformer]()
|
||||
transform_registry.register_from_entrypoint("datahub.ingestion.transformer.plugins")
|
||||
|
||||
transform_registry.register(
|
||||
"simple_remove_dataset_ownership", SimpleRemoveDatasetOwnership
|
||||
)
|
||||
transform_registry.register("mark_dataset_status", MarkDatasetStatus)
|
||||
transform_registry.register("set_dataset_browse_path", AddDatasetBrowsePathTransformer)
|
||||
|
||||
transform_registry.register("add_dataset_ownership", AddDatasetOwnership)
|
||||
transform_registry.register("simple_add_dataset_ownership", SimpleAddDatasetOwnership)
|
||||
transform_registry.register("pattern_add_dataset_ownership", PatternAddDatasetOwnership)
|
||||
|
||||
transform_registry.register("add_dataset_domain", dataset_domain.AddDatasetDomain)
|
||||
transform_registry.register(
|
||||
"simple_add_dataset_domain", dataset_domain.SimpleAddDatasetDomain
|
||||
)
|
||||
transform_registry.register(
|
||||
"pattern_add_dataset_domain", dataset_domain.PatternAddDatasetDomain
|
||||
)
|
||||
|
||||
|
||||
transform_registry.register("add_dataset_tags", AddDatasetTags)
|
||||
transform_registry.register("simple_add_dataset_tags", SimpleAddDatasetTags)
|
||||
transform_registry.register("pattern_add_dataset_tags", PatternAddDatasetTags)
|
||||
|
||||
transform_registry.register("add_dataset_terms", AddDatasetTerms)
|
||||
transform_registry.register("simple_add_dataset_terms", SimpleAddDatasetTerms)
|
||||
transform_registry.register("pattern_add_dataset_terms", PatternAddDatasetTerms)
|
||||
|
||||
transform_registry.register("add_dataset_properties", AddDatasetProperties)
|
||||
transform_registry.register("simple_add_dataset_properties", SimpleAddDatasetProperties)
|
||||
|
||||
transform_registry.register(
|
||||
"pattern_add_dataset_schema_terms", PatternAddDatasetSchemaTerms
|
||||
)
|
||||
transform_registry.register(
|
||||
"pattern_add_dataset_schema_tags", PatternAddDatasetSchemaTags
|
||||
)
|
||||
# These transformers are always enabled
|
||||
assert transform_registry.get("simple_remove_dataset_ownership")
|
||||
assert transform_registry.get("mark_dataset_status")
|
||||
assert transform_registry.get("set_dataset_browse_path")
|
||||
assert transform_registry.get("add_dataset_ownership")
|
||||
assert transform_registry.get("simple_add_dataset_ownership")
|
||||
assert transform_registry.get("pattern_add_dataset_ownership")
|
||||
assert transform_registry.get("add_dataset_domain")
|
||||
assert transform_registry.get("simple_add_dataset_domain")
|
||||
assert transform_registry.get("pattern_add_dataset_domain")
|
||||
assert transform_registry.get("add_dataset_tags")
|
||||
assert transform_registry.get("simple_add_dataset_tags")
|
||||
assert transform_registry.get("pattern_add_dataset_tags")
|
||||
assert transform_registry.get("add_dataset_terms")
|
||||
assert transform_registry.get("simple_add_dataset_terms")
|
||||
assert transform_registry.get("pattern_add_dataset_terms")
|
||||
assert transform_registry.get("add_dataset_properties")
|
||||
assert transform_registry.get("simple_add_dataset_properties")
|
||||
assert transform_registry.get("pattern_add_dataset_schema_terms")
|
||||
assert transform_registry.get("pattern_add_dataset_schema_tags")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user