Fixes #17747: dbt update owners (#19144)

* Fixes 17747: dbt update owners

* update messages

* addressed comments

* py_format

* py_format

* Added tests
This commit is contained in:
Suman Maharana 2025-01-07 16:37:30 +05:30 committed by GitHub
parent 7f7cfb1e8c
commit 00be51f299
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 111 additions and 9 deletions

View File

@ -122,6 +122,11 @@ class DbtServiceTopology(ServiceTopology):
processor="process_dbt_descriptions",
nullable=True,
),
NodeStage(
type_=DataModelLink,
processor="process_dbt_owners",
nullable=True,
),
],
)
process_dbt_tests: Annotated[
@ -293,6 +298,12 @@ class DbtServiceSource(TopologyRunnerMixin, Source, ABC):
Method to process DBT descriptions using patch APIs
"""
@abstractmethod
def process_dbt_owners(self, data_model_link: DataModelLink):
"""
Method to process DBT owners using patch APIs
"""
def get_dbt_tests(self) -> dict:
"""
Prepare the DBT tests

View File

@ -61,6 +61,7 @@ from metadata.ingestion.api.models import Either
from metadata.ingestion.lineage.models import ConnectionTypeDialectMapper
from metadata.ingestion.lineage.sql_lineage import get_lineage_by_query
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.models.patch_request import PatchedEntity, PatchRequest
from metadata.ingestion.models.table_metadata import ColumnDescription
from metadata.ingestion.ometa.client import APIError
from metadata.ingestion.ometa.ometa_api import OpenMetadata
@ -890,6 +891,47 @@ class DbtSource(DbtServiceSource):
f"to update dbt description: {exc}"
)
def process_dbt_owners(
self, data_model_link: DataModelLink
) -> Iterable[Either[PatchedEntity]]:
"""
Method to process DBT owners
"""
table_entity: Table = data_model_link.table_entity
if table_entity:
logger.debug(
f"Processing DBT owners for: {table_entity.fullyQualifiedName.root}"
)
try:
data_model = data_model_link.datamodel
if (
data_model.resourceType != DbtCommonEnum.SOURCE.value
and self.source_config.dbtUpdateOwners
):
logger.debug(
f"Overwriting owners with DBT owners: {table_entity.fullyQualifiedName.root}"
)
if data_model.owners:
new_entity = deepcopy(table_entity)
new_entity.owners = data_model.owners
yield Either(
right=PatchRequest(
original_entity=table_entity,
new_entity=new_entity,
override_metadata=True,
)
)
except Exception as exc: # pylint: disable=broad-except
yield Either(
left=StackTraceError(
name=str(table_entity.fullyQualifiedName.root),
error=f"Failed to parse the node"
f"{table_entity.fullyQualifiedName.root} to update dbt owner: {exc}",
stackTrace=traceback.format_exc(),
)
)
def create_dbt_tests_definition(
self, dbt_test: dict
) -> Iterable[Either[CreateTestDefinitionRequest]]:

View File

@ -53,6 +53,7 @@ mock_dbt_config = {
"dbtRunResultsFilePath": "sample/dbt_files/run_results.json",
"dbtSourcesFilePath": "sample/dbt_files/sources.json",
},
"dbtUpdateOwners": True,
}
},
},
@ -675,6 +676,7 @@ class DbtUnitTest(TestCase):
data_model_link.right.table_entity.fullyQualifiedName.root,
EXPECTED_DATA_MODEL_FQNS,
)
self.check_process_dbt_owners(data_model_link.right)
data_model_list.append(data_model_link.right.datamodel)
for _, (expected, original) in enumerate(
@ -682,6 +684,12 @@ class DbtUnitTest(TestCase):
):
self.assertEqual(expected, original)
def check_process_dbt_owners(self, data_model_link):
process_dbt_owners = self.dbt_source_obj.process_dbt_owners(data_model_link)
for entity in process_dbt_owners:
entity_owner = entity.right.new_entity.owners
self.assertEqual(entity_owner, MOCK_OWNER)
@patch("metadata.ingestion.ometa.mixins.es_mixin.ESMixin.es_search_from_fqn")
def test_upstream_nodes_for_lineage(self, es_search_from_fqn):
expected_upstream_nodes = [

View File

@ -4,6 +4,8 @@
**dbtUpdateDescriptions**: Configuration to update the description from dbt or not. If set to true descriptions from dbt will override the already present descriptions on the entity. For more details visit [here](/connectors/ingestion/workflows/dbt/ingest-dbt-descriptions)
**dbtUpdateOwners**: Configuration to update the owner from dbt or not. If set to true owners from dbt will override the already present owners on the entity. For more details visit [here](/connectors/ingestion/workflows/dbt/ingest-dbt-owner)
**includeTags**: true or false, to ingest tags from dbt. Default is true.
**dbtClassificationName**: Custom OpenMetadata Classification name for dbt tags.

View File

@ -1,5 +1,6 @@
```yaml {% srNumber=120 %}
# dbtUpdateDescriptions: true or false
# dbtUpdateOwners: true or false
# includeTags: true or false
# dbtClassificationName: dbtTags
# databaseFilterPattern:

View File

@ -4,6 +4,8 @@
**dbtUpdateDescriptions**: Configuration to update the description from dbt or not. If set to true descriptions from dbt will override the already present descriptions on the entity. For more details visit [here](/connectors/ingestion/workflows/dbt/ingest-dbt-descriptions)
**dbtUpdateOwners**: Configuration to update the owner from dbt or not. If set to true owners from dbt will override the already present owners on the entity. For more details visit [here](/connectors/ingestion/workflows/dbt/ingest-dbt-owner)
**includeTags**: true or false, to ingest tags from dbt. Default is true.
**dbtClassificationName**: Custom OpenMetadata Classification name for dbt tags.

View File

@ -1,5 +1,6 @@
```yaml {% srNumber=120 %}
# dbtUpdateDescriptions: true or false
# dbtUpdateOwners: true or false
# includeTags: true or false
# dbtClassificationName: dbtTags
# databaseFilterPattern:

View File

@ -138,8 +138,16 @@ After running the ingestion workflow with dbt you can see the created user or te
{% note %}
## Overriding the existing table Owners
If a table already has a owner linked to it, owner from the dbt will not update the current owner.
To establish a unified and reliable system for owners, a single source of truth is necessary. It either is directly OpenMetadata, if individuals want to go there and keep updating, or if they prefer to keep it centralized in dbt, then we can always rely on that directly.
{% /note %}
When the `Update Owners` toggle is enabled during the configuration of dbt ingestion, existing owners of tables will be overwritten with the dbt owners.
If toggle is disabled during the configuration of dbt ingestion, dbt owners will only be updated for tables in OpenMetadata that currently have no owners. Existing owners will remain unchanged and will not be overwritten with dbt owners.
{% image
src="/images/v1.6/features/ingestion/workflows/dbt/dbt-features/dbt-update-owners.webp"
alt="update-dbt-owners"
caption="Update dbt Owners"
/%}

View File

@ -12,6 +12,7 @@ slug: /main-concepts/metadata-standard/schemas/metadataingestion/dbtpipeline
- **`type`**: Pipeline type. Refer to *#/definitions/dbtConfigType*. Default: `DBT`.
- **`dbtConfigSource`**: Available sources to fetch DBT catalog and manifest files.
- **`dbtUpdateDescriptions`** *(boolean)*: Optional configuration to update the description from DBT or not. Default: `False`.
- **`dbtUpdateOwners`** *(boolean)*: Optional configuration to update the owners from DBT or not. Default: `False`.
- **`includeTags`** *(boolean)*: Optional configuration to toggle the tags ingestion. Default: `True`.
- **`dbtClassificationName`** *(string)*: Custom OpenMetadata Classification name for dbt tags. Default: `dbtTags`.
- **`schemaFilterPattern`**: Regex to only fetch tables or databases that matches the pattern. Refer to *../type/filterPattern.json#/definitions/filterPattern*.

View File

@ -138,8 +138,16 @@ After running the ingestion workflow with dbt you can see the created user or te
{% note %}
## Overriding the existing table Owners
If a table already has a owner linked to it, owner from the dbt will not update the current owner.
To establish a unified and reliable system for owners, a single source of truth is necessary. It either is directly OpenMetadata, if individuals want to go there and keep updating, or if they prefer to keep it centralized in dbt, then we can always rely on that directly.
{% /note %}
When the `Update Owners` toggle is enabled during the configuration of dbt ingestion, existing owners of tables will be overwritten with the dbt owners.
If toggle is disabled during the configuration of dbt ingestion, dbt owners will only be updated for tables in OpenMetadata that currently have no owners. Existing owners will remain unchanged and will not be overwritten with dbt owners.
{% image
src="/images/v1.6/features/ingestion/workflows/dbt/dbt-features/dbt-update-owners.webp"
alt="update-dbt-owners"
caption="Update dbt Owners"
/%}

View File

@ -12,6 +12,7 @@ slug: /main-concepts/metadata-standard/schemas/metadataingestion/dbtpipeline
- **`type`**: Pipeline type. Refer to *#/definitions/dbtConfigType*. Default: `DBT`.
- **`dbtConfigSource`**: Available sources to fetch DBT catalog and manifest files.
- **`dbtUpdateDescriptions`** *(boolean)*: Optional configuration to update the description from DBT or not. Default: `False`.
- **`dbtUpdateOwners`** *(boolean)*: Optional configuration to update the owner from DBT or not. Default: `False`.
- **`includeTags`** *(boolean)*: Optional configuration to toggle the tags ingestion. Default: `True`.
- **`dbtClassificationName`** *(string)*: Custom OpenMetadata Classification name for dbt tags. Default: `dbtTags`.
- **`schemaFilterPattern`**: Regex to only fetch tables or databases that matches the pattern. Refer to *../type/filterPattern.json#/definitions/filterPattern*.

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

View File

@ -48,6 +48,11 @@
"type": "boolean",
"default": false
},
"dbtUpdateOwners": {
"description": "Optional configuration to update the owners from DBT or not",
"type": "boolean",
"default": false
},
"includeTags": {
"description": "Optional configuration to toggle the tags ingestion.",
"type": "boolean",

View File

@ -346,6 +346,16 @@ If the option is disabled, only tables and columns without any existing descript
However, if the option is enabled, descriptions for all tables and columns in the dbt manifest will be updated in OpenMetadata.
$$
$$section
### Update Owners $(id="dbtUpdateOwners")
This options updates the table owner in OpenMetadata with owners from dbt.
If the option is disabled, only tables without any existing owners will have their owners updated based on the dbt manifest.
However, if the option is enabled, owners for all tables and columns in the dbt manifest will be updated in OpenMetadata.
$$
$$section
### Include dbt Tags $(id="includeTags")

View File

@ -10,9 +10,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
/**
* DBT Pipeline Configuration.
*/
export interface DbtPipeline {
@ -32,6 +30,10 @@ export interface DbtPipeline {
* Optional configuration to update the description from DBT or not
*/
dbtUpdateDescriptions?: boolean;
/**
* Optional configuration to update the owners from DBT or not
*/
dbtUpdateOwners?: boolean;
/**
* Optional configuration to toggle the tags ingestion.
*/