2022-05-23 09:51:34 +02:00
|
|
|
# Copyright 2021 Collate
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
"""
|
|
|
|
OMeta ES Mixin integration tests. The API needs to be up
|
|
|
|
"""
|
2022-07-20 12:44:50 +02:00
|
|
|
import logging
|
|
|
|
import time
|
2024-03-13 05:02:26 -07:00
|
|
|
import uuid
|
2024-10-10 17:14:22 +02:00
|
|
|
from copy import deepcopy
|
2022-05-23 09:51:34 +02:00
|
|
|
from unittest import TestCase
|
2024-09-12 07:14:56 +02:00
|
|
|
from unittest.mock import patch
|
2022-05-23 09:51:34 +02:00
|
|
|
|
2024-09-12 07:14:56 +02:00
|
|
|
import pytest
|
2023-09-19 07:37:47 +02:00
|
|
|
from requests.utils import quote
|
|
|
|
|
2022-05-23 09:51:34 +02:00
|
|
|
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
|
|
|
from metadata.generated.schema.api.data.createDatabaseSchema import (
|
|
|
|
CreateDatabaseSchemaRequest,
|
|
|
|
)
|
2023-09-19 07:37:47 +02:00
|
|
|
from metadata.generated.schema.api.data.createQuery import CreateQueryRequest
|
2022-05-23 09:51:34 +02:00
|
|
|
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
|
|
|
from metadata.generated.schema.api.services.createDatabaseService import (
|
|
|
|
CreateDatabaseServiceRequest,
|
|
|
|
)
|
2023-09-19 07:37:47 +02:00
|
|
|
from metadata.generated.schema.entity.data.query import Query
|
2022-05-23 09:51:34 +02:00
|
|
|
from metadata.generated.schema.entity.data.table import Column, DataType, Table
|
2023-06-16 13:18:12 +05:30
|
|
|
from metadata.generated.schema.entity.services.connections.database.common.basicAuth import (
|
|
|
|
BasicAuth,
|
|
|
|
)
|
2022-05-23 09:51:34 +02:00
|
|
|
from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
|
|
|
|
MysqlConnection,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
|
|
|
OpenMetadataConnection,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.entity.services.databaseService import (
|
|
|
|
DatabaseConnection,
|
|
|
|
DatabaseService,
|
|
|
|
DatabaseServiceType,
|
|
|
|
)
|
2022-09-26 16:19:47 +05:30
|
|
|
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
|
|
|
|
OpenMetadataJWTClientConfig,
|
|
|
|
)
|
2024-09-12 07:14:56 +02:00
|
|
|
from metadata.generated.schema.type.basic import EntityName, SqlQuery
|
2022-05-23 09:51:34 +02:00
|
|
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
2022-06-21 18:02:50 +02:00
|
|
|
from metadata.utils import fqn
|
2022-05-23 09:51:34 +02:00
|
|
|
|
2024-10-10 17:14:22 +02:00
|
|
|
from ..integration_base import TIER1_TAG, get_create_entity
|
2024-09-12 07:14:56 +02:00
|
|
|
|
2022-05-23 09:51:34 +02:00
|
|
|
|
|
|
|
class OMetaESTest(TestCase):
|
|
|
|
"""
|
|
|
|
Run this integration test with the local API available
|
|
|
|
Install the ingestion package before running the tests
|
|
|
|
"""
|
|
|
|
|
2022-09-26 16:19:47 +05:30
|
|
|
server_config = OpenMetadataConnection(
|
|
|
|
hostPort="http://localhost:8585/api",
|
|
|
|
authProvider="openmetadata",
|
|
|
|
securityConfig=OpenMetadataJWTClientConfig(
|
|
|
|
jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
|
|
|
),
|
|
|
|
)
|
2022-05-23 09:51:34 +02:00
|
|
|
metadata = OpenMetadata(server_config)
|
|
|
|
|
|
|
|
assert metadata.health_check()
|
|
|
|
|
|
|
|
service = CreateDatabaseServiceRequest(
|
|
|
|
name="test-service-es",
|
|
|
|
serviceType=DatabaseServiceType.Mysql,
|
|
|
|
connection=DatabaseConnection(
|
|
|
|
config=MysqlConnection(
|
|
|
|
username="username",
|
2023-06-16 13:18:12 +05:30
|
|
|
authType=BasicAuth(
|
|
|
|
password="password",
|
|
|
|
),
|
2022-05-23 09:51:34 +02:00
|
|
|
hostPort="http://localhost:1234",
|
|
|
|
)
|
|
|
|
),
|
|
|
|
)
|
2023-09-19 07:37:47 +02:00
|
|
|
another_service = CreateDatabaseServiceRequest(
|
|
|
|
name="another-test-service-es",
|
|
|
|
serviceType=DatabaseServiceType.Mysql,
|
|
|
|
connection=DatabaseConnection(
|
|
|
|
config=MysqlConnection(
|
|
|
|
username="username",
|
|
|
|
authType=BasicAuth(
|
|
|
|
password="password",
|
|
|
|
),
|
|
|
|
hostPort="http://localhost:1234",
|
|
|
|
)
|
|
|
|
),
|
|
|
|
)
|
2022-05-23 09:51:34 +02:00
|
|
|
service_type = "databaseService"
|
|
|
|
|
2022-07-20 12:44:50 +02:00
|
|
|
@classmethod
|
|
|
|
def check_es_index(cls) -> None:
|
|
|
|
"""
|
|
|
|
Wait until the index has been updated with the test table.
|
|
|
|
"""
|
|
|
|
logging.info("Checking ES index status...")
|
|
|
|
tries = 0
|
|
|
|
|
2023-09-19 07:37:47 +02:00
|
|
|
table_res = None
|
|
|
|
query_res = None
|
|
|
|
while not table_res and not query_res and tries <= 5: # Kill in 5 seconds
|
|
|
|
table_res = cls.metadata.es_search_from_fqn(
|
2022-07-20 12:44:50 +02:00
|
|
|
entity_type=Table,
|
|
|
|
fqn_search_string="test-service-es.test-db-es.test-schema-es.test-es",
|
|
|
|
)
|
2023-09-19 07:37:47 +02:00
|
|
|
query_res = cls.metadata.es_search_from_fqn(
|
|
|
|
entity_type=Query,
|
|
|
|
fqn_search_string=fqn.build(
|
|
|
|
metadata=None,
|
|
|
|
entity_type=Query,
|
|
|
|
service_name="test-service-es",
|
2024-03-13 05:02:26 -07:00
|
|
|
query_checksum=cls.checksum,
|
2023-09-19 07:37:47 +02:00
|
|
|
),
|
|
|
|
)
|
|
|
|
if not table_res or query_res:
|
2022-07-20 12:44:50 +02:00
|
|
|
tries += 1
|
|
|
|
time.sleep(1)
|
|
|
|
|
2022-05-23 09:51:34 +02:00
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls) -> None:
|
|
|
|
"""
|
|
|
|
Prepare ingredients
|
|
|
|
"""
|
|
|
|
|
|
|
|
cls.service_entity = cls.metadata.create_or_update(data=cls.service)
|
|
|
|
|
|
|
|
create_db = CreateDatabaseRequest(
|
2022-05-23 14:54:47 +02:00
|
|
|
name="test-db-es",
|
2023-02-13 00:08:55 -08:00
|
|
|
service=cls.service_entity.fullyQualifiedName,
|
2022-05-23 09:51:34 +02:00
|
|
|
)
|
|
|
|
|
2023-02-13 00:08:55 -08:00
|
|
|
cls.create_db_entity = cls.metadata.create_or_update(data=create_db)
|
2022-05-23 09:51:34 +02:00
|
|
|
|
|
|
|
create_schema = CreateDatabaseSchemaRequest(
|
2023-02-13 00:08:55 -08:00
|
|
|
name="test-schema-es",
|
|
|
|
database=cls.create_db_entity.fullyQualifiedName,
|
2022-05-23 09:51:34 +02:00
|
|
|
)
|
|
|
|
|
2023-02-13 00:08:55 -08:00
|
|
|
cls.create_schema_entity = cls.metadata.create_or_update(data=create_schema)
|
2022-05-23 09:51:34 +02:00
|
|
|
|
|
|
|
create = CreateTableRequest(
|
2022-05-23 14:54:47 +02:00
|
|
|
name="test-es",
|
2023-02-13 00:08:55 -08:00
|
|
|
databaseSchema=cls.create_schema_entity.fullyQualifiedName,
|
2022-05-23 09:51:34 +02:00
|
|
|
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
|
|
|
)
|
|
|
|
|
|
|
|
cls.entity = cls.metadata.create_or_update(create)
|
|
|
|
|
2024-03-13 05:02:26 -07:00
|
|
|
query_str = str(uuid.uuid4())
|
|
|
|
cls.checksum = fqn.get_query_checksum(query_str)
|
2023-09-19 07:37:47 +02:00
|
|
|
# Create queries for the given service
|
|
|
|
query = CreateQueryRequest(
|
2024-06-05 21:18:37 +02:00
|
|
|
query=SqlQuery(query_str),
|
2023-09-19 07:37:47 +02:00
|
|
|
service=cls.service_entity.fullyQualifiedName,
|
|
|
|
processedLineage=True, # Only 1 with processed lineage
|
|
|
|
)
|
|
|
|
cls.metadata.create_or_update(query)
|
|
|
|
|
|
|
|
query2 = CreateQueryRequest(
|
2024-06-05 21:18:37 +02:00
|
|
|
query=SqlQuery(str(uuid.uuid4())),
|
2023-09-19 07:37:47 +02:00
|
|
|
service=cls.service_entity.fullyQualifiedName,
|
|
|
|
)
|
|
|
|
cls.metadata.create_or_update(query2)
|
|
|
|
|
|
|
|
# Create queries for another service
|
|
|
|
cls.another_service_entity = cls.metadata.create_or_update(
|
|
|
|
data=cls.another_service
|
|
|
|
)
|
|
|
|
|
|
|
|
another_query = CreateQueryRequest(
|
2024-06-05 21:18:37 +02:00
|
|
|
query=SqlQuery(str(uuid.uuid4())),
|
2023-09-19 07:37:47 +02:00
|
|
|
service=cls.another_service_entity.fullyQualifiedName,
|
|
|
|
processedLineage=True,
|
|
|
|
)
|
|
|
|
cls.metadata.create_or_update(another_query)
|
|
|
|
|
2022-07-20 12:44:50 +02:00
|
|
|
# Leave some time for indexes to get updated, otherwise this happens too fast
|
|
|
|
cls.check_es_index()
|
|
|
|
|
2022-05-23 09:51:34 +02:00
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls) -> None:
|
|
|
|
"""
|
|
|
|
Clean up
|
|
|
|
"""
|
|
|
|
|
|
|
|
service_id = str(
|
|
|
|
cls.metadata.get_by_name(
|
2024-06-05 21:18:37 +02:00
|
|
|
entity=DatabaseService, fqn=cls.service.name.root
|
|
|
|
).id.root
|
2022-05-23 09:51:34 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
cls.metadata.delete(
|
|
|
|
entity=DatabaseService,
|
|
|
|
entity_id=service_id,
|
|
|
|
recursive=True,
|
|
|
|
hard_delete=True,
|
|
|
|
)
|
|
|
|
|
2023-09-19 07:37:47 +02:00
|
|
|
another_service_id = str(
|
|
|
|
cls.metadata.get_by_name(
|
2024-06-05 21:18:37 +02:00
|
|
|
entity=DatabaseService, fqn=cls.another_service.name.root
|
|
|
|
).id.root
|
2023-09-19 07:37:47 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
cls.metadata.delete(
|
|
|
|
entity=DatabaseService,
|
|
|
|
entity_id=another_service_id,
|
|
|
|
recursive=True,
|
|
|
|
hard_delete=True,
|
|
|
|
)
|
|
|
|
|
2022-05-23 09:51:34 +02:00
|
|
|
def test_es_search_from_service_table(self):
|
|
|
|
"""
|
|
|
|
We can fetch tables from a service
|
|
|
|
"""
|
2022-06-21 18:02:50 +02:00
|
|
|
|
|
|
|
fqn_search_string = fqn._build(
|
2024-06-05 21:18:37 +02:00
|
|
|
self.service.name.root, "*", "*", self.entity.name.root
|
2022-06-21 18:02:50 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
res = self.metadata.es_search_from_fqn(
|
2022-05-23 09:51:34 +02:00
|
|
|
entity_type=Table,
|
2022-06-21 18:02:50 +02:00
|
|
|
fqn_search_string=fqn_search_string,
|
2022-05-23 14:54:47 +02:00
|
|
|
size=100,
|
2024-07-29 23:06:39 -07:00
|
|
|
fields="owners",
|
2022-05-23 09:51:34 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
# We get the created table back
|
|
|
|
self.assertIsNotNone(res)
|
|
|
|
self.assertIn(self.entity, res)
|
|
|
|
|
2022-06-21 18:02:50 +02:00
|
|
|
fqn_search_string = fqn._build(
|
2024-06-05 21:18:37 +02:00
|
|
|
self.service.name.root,
|
|
|
|
self.create_db_entity.name.root,
|
2022-06-21 18:02:50 +02:00
|
|
|
"*",
|
2024-06-05 21:18:37 +02:00
|
|
|
self.entity.name.root,
|
2022-06-21 18:02:50 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
res = self.metadata.es_search_from_fqn(
|
2022-05-23 09:51:34 +02:00
|
|
|
entity_type=Table,
|
2022-06-21 18:02:50 +02:00
|
|
|
fqn_search_string=fqn_search_string,
|
2022-05-23 14:54:47 +02:00
|
|
|
size=100,
|
2024-07-29 23:06:39 -07:00
|
|
|
fields="owners",
|
2022-05-23 09:51:34 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
self.assertIsNotNone(res)
|
|
|
|
self.assertIn(self.entity, res)
|
|
|
|
|
2022-06-21 18:02:50 +02:00
|
|
|
fqn_search_string = fqn._build(
|
2024-06-05 21:18:37 +02:00
|
|
|
self.service.name.root,
|
|
|
|
self.create_db_entity.name.root,
|
|
|
|
self.create_schema_entity.name.root,
|
|
|
|
self.entity.name.root,
|
2022-06-21 18:02:50 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
res = self.metadata.es_search_from_fqn(
|
2022-05-23 09:51:34 +02:00
|
|
|
entity_type=Table,
|
2022-06-21 18:02:50 +02:00
|
|
|
fqn_search_string=fqn_search_string,
|
2022-05-23 14:54:47 +02:00
|
|
|
size=100,
|
2024-07-29 23:06:39 -07:00
|
|
|
fields="owners",
|
2022-05-23 09:51:34 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
self.assertIsNotNone(res)
|
|
|
|
self.assertIn(self.entity, res)
|
|
|
|
|
|
|
|
def test_es_search_from_service_table_empty(self):
|
|
|
|
"""
|
|
|
|
Wrong filters return none
|
|
|
|
"""
|
2022-06-21 18:02:50 +02:00
|
|
|
res = self.metadata.es_search_from_fqn(
|
2022-05-23 09:51:34 +02:00
|
|
|
entity_type=Table,
|
2022-06-21 18:02:50 +02:00
|
|
|
fqn_search_string="random",
|
2022-05-23 09:51:34 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
self.assertIsNone(res)
|
2023-09-19 07:37:47 +02:00
|
|
|
|
|
|
|
def test_get_query_with_lineage_filter(self):
|
|
|
|
"""Check we are building the proper filter"""
|
|
|
|
res = self.metadata.get_query_with_lineage_filter("my_service")
|
|
|
|
expected = (
|
|
|
|
'{"query": {"bool": {"must": [{"term": {"processedLineage": true}},'
|
|
|
|
' {"term": {"service.name.keyword": "my_service"}}]}}}'
|
|
|
|
)
|
2023-12-18 17:01:57 +01:00
|
|
|
self.assertEqual(res, quote(expected))
|
2023-09-19 07:37:47 +02:00
|
|
|
|
|
|
|
def test_get_queries_with_lineage(self):
|
|
|
|
"""Check the payload from ES"""
|
2024-06-05 21:18:37 +02:00
|
|
|
res = self.metadata.es_get_queries_with_lineage(self.service.name.root)
|
2024-03-13 05:02:26 -07:00
|
|
|
self.assertIn(self.checksum, res)
|
2024-09-12 07:14:56 +02:00
|
|
|
|
|
|
|
def test_paginate_no_filter(self):
|
|
|
|
"""We can paginate all the data"""
|
|
|
|
# Since the test can run in parallel with other tables being there, we just
|
|
|
|
# want to check we are actually getting some results
|
|
|
|
for asset in self.metadata.paginate_es(entity=Table, size=2):
|
|
|
|
assert asset
|
|
|
|
break
|
|
|
|
|
|
|
|
def test_paginate_with_errors(self):
|
|
|
|
"""We don't want to stop the ES yields just because a single Entity has an error"""
|
|
|
|
# 1. First, prepare some tables
|
|
|
|
for name in [f"table_{i}" for i in range(10)]:
|
|
|
|
self.metadata.create_or_update(
|
|
|
|
data=get_create_entity(
|
|
|
|
entity=Table,
|
|
|
|
name=EntityName(name),
|
|
|
|
reference=self.create_schema_entity.fullyQualifiedName,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
# 2. We'll fetch the entities, but we need to force a failure to ensure we can recover
|
|
|
|
error_name = fqn._build(
|
|
|
|
self.service_entity.name.root,
|
|
|
|
self.create_db_entity.name.root,
|
|
|
|
self.create_schema_entity.name.root,
|
|
|
|
"table_5",
|
|
|
|
)
|
|
|
|
ok_name = fqn._build(
|
|
|
|
self.service_entity.name.root,
|
|
|
|
self.create_db_entity.name.root,
|
|
|
|
self.create_schema_entity.name.root,
|
|
|
|
"table_6",
|
|
|
|
)
|
|
|
|
|
|
|
|
rest_client = self.metadata.client
|
|
|
|
original_get = rest_client.get
|
|
|
|
with patch.object(rest_client, "get", wraps=rest_client.get) as mock_get:
|
|
|
|
|
|
|
|
def side_effect(path: str, data=None):
|
|
|
|
# In case we pass filters as well, use `in path` rather than ==
|
|
|
|
if f"/tables/name/{error_name}" in path:
|
|
|
|
raise RuntimeError("Error")
|
|
|
|
return original_get(path, data)
|
|
|
|
|
|
|
|
mock_get.side_effect = side_effect
|
|
|
|
|
|
|
|
# Validate we are raising the error
|
|
|
|
with pytest.raises(RuntimeError):
|
|
|
|
self.metadata.get_by_name(entity=Table, fqn=error_name)
|
|
|
|
|
|
|
|
# This works
|
|
|
|
self.metadata.get_by_name(entity=Table, fqn=ok_name)
|
|
|
|
|
|
|
|
query_filter = (
|
|
|
|
'{"query":{"bool":{"must":[{"bool":{"should":[{"term":'
|
|
|
|
f'{{"service.displayName.keyword":"{self.service_entity.name.root}"}}}}]}}}}]}}}}}}'
|
|
|
|
)
|
|
|
|
assets = list(
|
|
|
|
self.metadata.paginate_es(
|
|
|
|
entity=Table, query_filter=query_filter, size=2
|
|
|
|
)
|
|
|
|
)
|
|
|
|
assert len(assets) == 10
|
2024-10-10 17:14:22 +02:00
|
|
|
|
|
|
|
def test_paginate_with_filters(self):
|
|
|
|
"""We can paginate only tier 1 tables"""
|
|
|
|
# prepare some tables with tier 1 tags
|
|
|
|
for idx, name in enumerate([f"filtered_{i}" for i in range(10)]):
|
|
|
|
table = self.metadata.create_or_update(
|
|
|
|
data=get_create_entity(
|
|
|
|
entity=Table,
|
|
|
|
name=EntityName(name),
|
|
|
|
reference=self.create_schema_entity.fullyQualifiedName,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
if idx % 2 == 0:
|
|
|
|
dest = deepcopy(table)
|
|
|
|
dest.tags = [TIER1_TAG]
|
|
|
|
self.metadata.patch(entity=Table, source=table, destination=dest)
|
|
|
|
|
|
|
|
query_filter = (
|
|
|
|
'{"query":{"bool":{"must":[{"bool":{"must":['
|
|
|
|
'{"term":{"tier.tagFQN":"Tier.Tier1"}},'
|
|
|
|
f'{{"term":{{"service.displayName.keyword":"{self.service_entity.name.root}"}}}}'
|
|
|
|
"]}}]}}}"
|
|
|
|
)
|
|
|
|
assets = list(
|
|
|
|
self.metadata.paginate_es(entity=Table, query_filter=query_filter, size=2)
|
|
|
|
)
|
|
|
|
assert len(assets) == 5
|