From f9dd7b53cb7bec03742be87aa8d4e7dfd8a2be12 Mon Sep 17 00:00:00 2001 From: Pere Miquel Brull Date: Fri, 6 Sep 2024 12:12:44 +0200 Subject: [PATCH] MINOR - Fix lineage GET for names with `/` and standardize quote calls (#17748) * MINOR - Fix lineage GET for names with `/` and standardize quote calls * format * fix import --- .../ingestion/ometa/mixins/es_mixin.py | 2 +- .../ingestion/ometa/mixins/lineage_mixin.py | 4 +- .../ingestion/ometa/mixins/table_mixin.py | 9 ++-- .../ingestion/ometa/mixins/tests_mixin.py | 5 +- .../src/metadata/ingestion/ometa/ometa_api.py | 5 +- .../src/metadata/ingestion/ometa/utils.py | 11 ++++ .../ometa/test_ometa_lineage_api.py | 54 +++++++++++++++++-- .../integration/ometa/test_ometa_table_api.py | 19 +++++++ ingestion/tests/unit/test_fqn.py | 10 ++++ 9 files changed, 102 insertions(+), 17 deletions(-) diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/es_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/es_mixin.py index 1202162430e..2add6520d24 100644 --- a/ingestion/src/metadata/ingestion/ometa/mixins/es_mixin.py +++ b/ingestion/src/metadata/ingestion/ometa/mixins/es_mixin.py @@ -19,11 +19,11 @@ import traceback from typing import Generic, Iterable, List, Optional, Set, Type, TypeVar from pydantic import BaseModel -from requests.utils import quote from metadata.generated.schema.entity.data.container import Container from metadata.generated.schema.entity.data.query import Query from metadata.ingestion.ometa.client import REST, APIError +from metadata.ingestion.ometa.utils import quote from metadata.utils.elasticsearch import ES_INDEX_MAP from metadata.utils.logger import ometa_logger diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/lineage_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/lineage_mixin.py index a3e1da1870c..7cfca8c9f87 100644 --- a/ingestion/src/metadata/ingestion/ometa/mixins/lineage_mixin.py +++ b/ingestion/src/metadata/ingestion/ometa/mixins/lineage_mixin.py @@ -28,7 +28,7 @@ from metadata.ingestion.lineage.models import ConnectionTypeDialectMapper from metadata.ingestion.lineage.parser import LINEAGE_PARSING_TIMEOUT from metadata.ingestion.models.patch_request import build_patch from metadata.ingestion.ometa.client import REST, APIError -from metadata.ingestion.ometa.utils import get_entity_type +from metadata.ingestion.ometa.utils import get_entity_type, quote from metadata.utils.logger import ometa_logger from metadata.utils.lru_cache import LRU_CACHE_SIZE, LRUCache @@ -279,7 +279,7 @@ class OMetaLineageMixin(Generic[T]): """ return self._get_lineage( entity=entity, - path=f"name/{fqn}", + path=f"name/{quote(fqn)}", up_depth=up_depth, down_depth=down_depth, ) diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/table_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/table_mixin.py index e6a539b111e..caf3d00ff52 100644 --- a/ingestion/src/metadata/ingestion/ometa/mixins/table_mixin.py +++ b/ingestion/src/metadata/ingestion/ometa/mixins/table_mixin.py @@ -16,8 +16,7 @@ To be used by OpenMetadata class import traceback from typing import List, Optional, Type, TypeVar -from pydantic import BaseModel -from requests.utils import quote +from pydantic import BaseModel, validate_call from metadata.generated.schema.api.data.createTableProfile import ( CreateTableProfileRequest, @@ -39,7 +38,7 @@ from metadata.generated.schema.type.basic import FullyQualifiedEntityName, Uuid from metadata.generated.schema.type.usageRequest import UsageRequest from metadata.ingestion.ometa.client import REST from metadata.ingestion.ometa.models import EntityList -from metadata.ingestion.ometa.utils import model_str +from metadata.ingestion.ometa.utils import model_str, quote from metadata.utils.logger import ometa_logger logger = ometa_logger() @@ -227,6 +226,7 @@ class OMetaTableMixin: return None + @validate_call def get_profile_data( self, fqn: str, @@ -253,7 +253,6 @@ class OMetaTableMixin: Returns: EntityList: EntityList list object """ - url_after = f"&after={after}" if after else "" profile_type_url = profile_type.__name__[0].lower() + profile_type.__name__[1:] @@ -290,7 +289,7 @@ class OMetaTableMixin: Returns: Optional[Table]: OM table object """ - return self._get(Table, f"{quote(model_str(fqn), safe='')}/tableProfile/latest") + return self._get(Table, f"{quote(fqn)}/tableProfile/latest") def create_or_update_custom_metric( self, custom_metric: CreateCustomMetricRequest, table_id: str diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/tests_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/tests_mixin.py index 87d431ee35a..c76ddc89631 100644 --- a/ingestion/src/metadata/ingestion/ometa/mixins/tests_mixin.py +++ b/ingestion/src/metadata/ingestion/ometa/mixins/tests_mixin.py @@ -17,7 +17,6 @@ To be used by OpenMetadata class import traceback from datetime import datetime from typing import List, Optional, Type, Union -from urllib.parse import quote from uuid import UUID from metadata.generated.schema.api.tests.createLogicalTestCases import ( @@ -46,7 +45,7 @@ from metadata.generated.schema.tests.testDefinition import ( from metadata.generated.schema.tests.testSuite import TestSuite from metadata.generated.schema.type.entityReference import EntityReference from metadata.ingestion.ometa.client import REST -from metadata.ingestion.ometa.utils import model_str +from metadata.ingestion.ometa.utils import model_str, quote from metadata.utils.logger import ometa_logger logger = ometa_logger() @@ -76,7 +75,7 @@ class OMetaTestsMixin: _type_: _description_ """ resp = self.client.put( - f"{self.get_suffix(TestCase)}/{quote(test_case_fqn,safe='')}/testCaseResult", + f"{self.get_suffix(TestCase)}/{quote(test_case_fqn)}/testCaseResult", test_results.model_dump_json(), ) diff --git a/ingestion/src/metadata/ingestion/ometa/ometa_api.py b/ingestion/src/metadata/ingestion/ometa/ometa_api.py index b05c076d91a..24ce7f2248d 100644 --- a/ingestion/src/metadata/ingestion/ometa/ometa_api.py +++ b/ingestion/src/metadata/ingestion/ometa/ometa_api.py @@ -18,7 +18,6 @@ import traceback from typing import Dict, Generic, Iterable, List, Optional, Type, TypeVar, Union from pydantic import BaseModel -from requests.utils import quote from metadata.generated.schema.api.services.ingestionPipelines.createIngestionPipeline import ( CreateIngestionPipelineRequest, @@ -57,7 +56,7 @@ from metadata.ingestion.ometa.mixins.user_mixin import OMetaUserMixin from metadata.ingestion.ometa.mixins.version_mixin import OMetaVersionMixin from metadata.ingestion.ometa.models import EntityList from metadata.ingestion.ometa.routes import ROUTES -from metadata.ingestion.ometa.utils import get_entity_type, model_str +from metadata.ingestion.ometa.utils import get_entity_type, model_str, quote from metadata.utils.logger import ometa_logger from metadata.utils.secrets.secrets_manager_factory import SecretsManagerFactory from metadata.utils.ssl_registry import get_verify_ssl_fn @@ -293,7 +292,7 @@ class OpenMetadata( return self._get( entity=entity, - path=f"name/{quote(model_str(fqn), safe='')}", + path=f"name/{quote(fqn)}", fields=fields, nullable=nullable, ) diff --git a/ingestion/src/metadata/ingestion/ometa/utils.py b/ingestion/src/metadata/ingestion/ometa/utils.py index 0f534de0f5d..3468b87f380 100644 --- a/ingestion/src/metadata/ingestion/ometa/utils.py +++ b/ingestion/src/metadata/ingestion/ometa/utils.py @@ -17,6 +17,9 @@ import string from typing import Any, Type, TypeVar, Union from pydantic import BaseModel +from requests.utils import quote as url_quote + +from metadata.generated.schema.type.basic import FullyQualifiedEntityName T = TypeVar("T", bound=BaseModel) @@ -74,3 +77,11 @@ def model_str(arg: Any) -> str: return str(arg.root) return str(arg) + + +def quote(fqn: Union[FullyQualifiedEntityName, str]) -> str: + """ + Quote the FQN so that it's safe to pass to the API. + E.g., `"foo.bar/baz"` -> `%22foo.bar%2Fbaz%22` + """ + return url_quote(model_str(fqn), safe="") diff --git a/ingestion/tests/integration/ometa/test_ometa_lineage_api.py b/ingestion/tests/integration/ometa/test_ometa_lineage_api.py index e7264635a28..b661d6e2bad 100644 --- a/ingestion/tests/integration/ometa/test_ometa_lineage_api.py +++ b/ingestion/tests/integration/ometa/test_ometa_lineage_api.py @@ -25,6 +25,7 @@ from metadata.generated.schema.entity.data.table import Table from metadata.generated.schema.entity.services.dashboardService import DashboardService from metadata.generated.schema.entity.services.databaseService import DatabaseService from metadata.generated.schema.entity.services.pipelineService import PipelineService +from metadata.generated.schema.type.basic import EntityName from metadata.generated.schema.type.entityLineage import ( ColumnLineage, EntitiesEdge, @@ -85,7 +86,7 @@ class OMetaLineageTest(TestCase): ) ) - create_schema_entity = cls.metadata.create_or_update( + cls.create_schema_entity = cls.metadata.create_or_update( data=get_create_entity( entity=DatabaseSchema, reference=create_db_entity.fullyQualifiedName, @@ -96,14 +97,14 @@ class OMetaLineageTest(TestCase): cls.table1 = get_create_entity( name=generate_name(), entity=Table, - reference=create_schema_entity.fullyQualifiedName, + reference=cls.create_schema_entity.fullyQualifiedName, ) cls.table1_entity = cls.metadata.create_or_update(data=cls.table1) cls.table2 = get_create_entity( name=generate_name(), entity=Table, - reference=create_schema_entity.fullyQualifiedName, + reference=cls.create_schema_entity.fullyQualifiedName, ) cls.table2_entity = cls.metadata.create_or_update(data=cls.table2) @@ -337,3 +338,50 @@ class OMetaLineageTest(TestCase): ) entity_lineage = EntityLineage.model_validate(datamodel_lineage) self.assertEqual(from_id, str(entity_lineage.upstreamEdges[0].fromEntity.root)) + + def test_table_with_slash_in_name(self): + """E.g., `foo.bar/baz`""" + name = EntityName("foo.bar/baz") + new_table: Table = self.metadata.create_or_update( + data=get_create_entity( + entity=Table, + name=name, + reference=self.create_schema_entity.fullyQualifiedName, + ) + ) + + res: Table = self.metadata.get_by_name( + entity=Table, fqn=new_table.fullyQualifiedName + ) + + assert res.name == name + + self.metadata.add_lineage( + data=AddLineageRequest( + edge=EntitiesEdge( + fromEntity=EntityReference(id=self.table1_entity.id, type="table"), + toEntity=EntityReference(id=new_table.id, type="table"), + lineageDetails=LineageDetails( + columnsLineage=[ + ColumnLineage( + fromColumns=[ + self.table1_entity.columns[0].fullyQualifiedName + ], + toColumn=new_table.columns[0].fullyQualifiedName, + ) + ] + ), + ), + ) + ) + + # use the SDK to get the lineage + lineage = self.metadata.get_lineage_by_name( + entity=Table, + fqn=new_table.fullyQualifiedName.root, + ) + entity_lineage = EntityLineage.model_validate(lineage) + assert ( + entity_lineage.upstreamEdges[0].fromEntity.root + == self.table1_entity.id.root + ) diff --git a/ingestion/tests/integration/ometa/test_ometa_table_api.py b/ingestion/tests/integration/ometa/test_ometa_table_api.py index 8dc184741a9..ce68e4fba77 100644 --- a/ingestion/tests/integration/ometa/test_ometa_table_api.py +++ b/ingestion/tests/integration/ometa/test_ometa_table_api.py @@ -76,6 +76,8 @@ from metadata.generated.schema.type.entityReferenceList import EntityReferenceLi from metadata.generated.schema.type.usageRequest import UsageRequest from metadata.ingestion.ometa.client import REST +from ..integration_base import get_create_entity + BAD_RESPONSE = { "data": [ { @@ -643,3 +645,20 @@ class OMetaTableTest(TestCase): # We should have 2 tables, the 3rd one is broken and should be skipped assert len(list(res)) == 2 + + def test_table_with_slash_in_name(self): + """E.g., `foo.bar/baz`""" + name = EntityName("foo.bar/baz") + new_table: Table = self.metadata.create_or_update( + data=get_create_entity( + entity=Table, + name=name, + reference=self.create_schema_entity.fullyQualifiedName, + ) + ) + + res: Table = self.metadata.get_by_name( + entity=Table, fqn=new_table.fullyQualifiedName + ) + + assert res.name == name diff --git a/ingestion/tests/unit/test_fqn.py b/ingestion/tests/unit/test_fqn.py index 089efcd0431..f522b194b62 100644 --- a/ingestion/tests/unit/test_fqn.py +++ b/ingestion/tests/unit/test_fqn.py @@ -17,6 +17,8 @@ from unittest.mock import MagicMock import pytest from metadata.generated.schema.entity.data.table import Table +from metadata.generated.schema.type.basic import FullyQualifiedEntityName +from metadata.ingestion.ometa.utils import quote from metadata.utils import fqn @@ -145,3 +147,11 @@ class TestFqn(TestCase): with pytest.raises(ValueError): fqn.split_test_case_fqn("local_redshift.dev.dbt_jaffle.customers") + + def test_quote_fqns(self): + """We can properly quote FQNs for URL usage""" + assert quote(FullyQualifiedEntityName("a.b.c")) == "a.b.c" + # Works with strings directly + assert quote("a.b.c") == "a.b.c" + assert quote(FullyQualifiedEntityName('"foo.bar".baz')) == "%22foo.bar%22.baz" + assert quote('"foo.bar/baz".hello') == "%22foo.bar%2Fbaz%22.hello"