2025-04-03 10:39:47 +05:30
|
|
|
# Copyright 2025 Collate
|
|
|
|
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
2021-12-01 12:46:28 +05:30
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
2025-04-03 10:39:47 +05:30
|
|
|
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
2021-12-01 12:46:28 +05:30
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2021-10-26 23:18:43 +02:00
|
|
|
"""
|
2021-10-28 20:31:38 +02:00
|
|
|
OpenMetadata high-level API Table test
|
2021-10-26 23:18:43 +02:00
|
|
|
"""
|
|
|
|
import uuid
|
2022-05-26 07:35:23 +02:00
|
|
|
from copy import deepcopy
|
2024-06-07 04:36:17 +02:00
|
|
|
from datetime import datetime
|
2023-03-16 09:25:30 +05:30
|
|
|
from typing import List
|
2021-10-26 23:18:43 +02:00
|
|
|
from unittest import TestCase
|
2023-12-04 17:59:09 +01:00
|
|
|
from unittest.mock import patch
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
from pydantic import ValidationError
|
2021-10-26 23:18:43 +02:00
|
|
|
|
2024-06-25 07:51:22 +02:00
|
|
|
from _openmetadata_testutils.ometa import int_admin_ometa
|
2022-02-01 01:29:56 +01:00
|
|
|
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
2022-04-05 21:20:39 +02:00
|
|
|
from metadata.generated.schema.api.data.createDatabaseSchema import (
|
|
|
|
CreateDatabaseSchemaRequest,
|
|
|
|
)
|
2023-03-16 09:25:30 +05:30
|
|
|
from metadata.generated.schema.api.data.createQuery import CreateQueryRequest
|
2022-02-01 01:29:56 +01:00
|
|
|
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
2022-08-22 09:01:24 -07:00
|
|
|
from metadata.generated.schema.api.data.createTableProfile import (
|
|
|
|
CreateTableProfileRequest,
|
|
|
|
)
|
2021-10-26 23:18:43 +02:00
|
|
|
from metadata.generated.schema.api.services.createDatabaseService import (
|
2022-02-01 01:29:56 +01:00
|
|
|
CreateDatabaseServiceRequest,
|
2021-10-26 23:18:43 +02:00
|
|
|
)
|
2022-02-01 01:29:56 +01:00
|
|
|
from metadata.generated.schema.api.teams.createUser import CreateUserRequest
|
2023-03-16 09:25:30 +05:30
|
|
|
from metadata.generated.schema.entity.data.query import Query
|
2021-11-01 16:57:53 +01:00
|
|
|
from metadata.generated.schema.entity.data.table import (
|
|
|
|
Column,
|
|
|
|
ColumnJoins,
|
2024-06-05 21:18:37 +02:00
|
|
|
ColumnName,
|
2021-11-01 16:57:53 +01:00
|
|
|
ColumnProfile,
|
|
|
|
DataType,
|
2024-06-05 21:18:37 +02:00
|
|
|
DmlOperationType,
|
2022-05-26 09:41:26 +03:00
|
|
|
JoinedWith,
|
2022-12-07 14:33:30 +01:00
|
|
|
SystemProfile,
|
2021-11-01 16:57:53 +01:00
|
|
|
Table,
|
|
|
|
TableData,
|
|
|
|
TableJoins,
|
|
|
|
TableProfile,
|
2022-08-04 07:22:47 -07:00
|
|
|
TableProfilerConfig,
|
2021-11-01 16:57:53 +01:00
|
|
|
)
|
2023-06-16 13:18:12 +05:30
|
|
|
from metadata.generated.schema.entity.services.connections.database.common.basicAuth import (
|
|
|
|
BasicAuth,
|
|
|
|
)
|
2022-04-05 21:20:39 +02:00
|
|
|
from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
|
|
|
|
MysqlConnection,
|
|
|
|
)
|
2021-10-26 23:18:43 +02:00
|
|
|
from metadata.generated.schema.entity.services.databaseService import (
|
2022-01-21 22:06:14 -08:00
|
|
|
DatabaseConnection,
|
2021-10-26 23:18:43 +02:00
|
|
|
DatabaseService,
|
|
|
|
DatabaseServiceType,
|
|
|
|
)
|
2023-03-16 09:25:30 +05:30
|
|
|
from metadata.generated.schema.entity.teams.user import User
|
2024-06-05 21:18:37 +02:00
|
|
|
from metadata.generated.schema.type.basic import (
|
|
|
|
Date,
|
|
|
|
EntityName,
|
|
|
|
FullyQualifiedEntityName,
|
|
|
|
SqlQuery,
|
|
|
|
Timestamp,
|
|
|
|
)
|
2021-10-26 23:18:43 +02:00
|
|
|
from metadata.generated.schema.type.entityReference import EntityReference
|
2024-07-29 23:06:39 -07:00
|
|
|
from metadata.generated.schema.type.entityReferenceList import EntityReferenceList
|
2022-06-03 13:42:28 +05:30
|
|
|
from metadata.generated.schema.type.usageRequest import UsageRequest
|
2023-12-04 17:59:09 +01:00
|
|
|
from metadata.ingestion.ometa.client import REST
|
2023-12-22 15:43:41 +01:00
|
|
|
|
2024-09-06 12:12:44 +02:00
|
|
|
from ..integration_base import get_create_entity
|
|
|
|
|
2023-12-04 17:59:09 +01:00
|
|
|
BAD_RESPONSE = {
|
|
|
|
"data": [
|
|
|
|
{
|
|
|
|
"id": "cb149dd4-f4c2-485e-acd3-74b7dca1015e",
|
|
|
|
"name": "my.fake.good.tableOne",
|
|
|
|
"columns": [
|
|
|
|
{
|
|
|
|
"name": "col1",
|
|
|
|
"dataType": "BIGINT",
|
|
|
|
}
|
|
|
|
],
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"id": "5d76676c-8e94-4e7e-97b8-294f4c16d0aa",
|
|
|
|
"name": "my.fake.good.tableTwo",
|
|
|
|
"columns": [
|
|
|
|
{
|
|
|
|
"name": "col1",
|
|
|
|
"dataType": "BIGINT",
|
|
|
|
}
|
|
|
|
],
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"id": "f063ff4e-99a3-4d42-8678-c484c2556e8d",
|
|
|
|
"name": "my.fake.bad.tableOne",
|
|
|
|
"columns": [
|
|
|
|
{
|
|
|
|
"name": "col1",
|
|
|
|
"dataType": "BIGINT",
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"tags": [
|
|
|
|
{
|
2024-06-26 21:14:07 +05:30
|
|
|
# Certain test cases are expected to fail as tagFQN's
|
|
|
|
# value is not a string to test out the skip_on_failure
|
|
|
|
"tagFQN": 123,
|
2023-12-04 17:59:09 +01:00
|
|
|
"source": "Classification",
|
|
|
|
"labelType": "Manual",
|
|
|
|
"state": "Confirmed",
|
|
|
|
}
|
|
|
|
],
|
|
|
|
},
|
|
|
|
],
|
|
|
|
"paging": {
|
|
|
|
"total": 3,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2021-10-26 23:18:43 +02:00
|
|
|
|
|
|
|
class OMetaTableTest(TestCase):
|
|
|
|
"""
|
|
|
|
Run this integration test with the local API available
|
|
|
|
Install the ingestion package before running the tests
|
|
|
|
"""
|
|
|
|
|
|
|
|
service_entity_id = None
|
|
|
|
|
2023-12-22 15:43:41 +01:00
|
|
|
metadata = int_admin_ometa()
|
2021-10-31 16:23:01 +01:00
|
|
|
|
2023-03-16 09:25:30 +05:30
|
|
|
user: User = metadata.create_or_update(
|
2022-02-01 01:29:56 +01:00
|
|
|
data=CreateUserRequest(name="random-user", email="random@user.com"),
|
2021-10-26 23:18:43 +02:00
|
|
|
)
|
2024-07-29 23:06:39 -07:00
|
|
|
owners = EntityReferenceList(
|
|
|
|
root=[
|
|
|
|
EntityReference(
|
|
|
|
id=user.id, type="user", fullyQualifiedName=user.fullyQualifiedName.root
|
|
|
|
)
|
|
|
|
]
|
2023-03-16 09:25:30 +05:30
|
|
|
)
|
2021-10-26 23:18:43 +02:00
|
|
|
|
2022-02-01 01:29:56 +01:00
|
|
|
service = CreateDatabaseServiceRequest(
|
2021-10-28 20:31:38 +02:00
|
|
|
name="test-service-table",
|
2022-04-14 11:22:39 +02:00
|
|
|
serviceType=DatabaseServiceType.Mysql,
|
2022-04-05 21:20:39 +02:00
|
|
|
connection=DatabaseConnection(
|
|
|
|
config=MysqlConnection(
|
|
|
|
username="username",
|
2023-06-16 13:18:12 +05:30
|
|
|
authType=BasicAuth(
|
|
|
|
password="password",
|
|
|
|
),
|
2022-04-05 21:20:39 +02:00
|
|
|
hostPort="http://localhost:1234",
|
|
|
|
)
|
|
|
|
),
|
2021-10-26 23:18:43 +02:00
|
|
|
)
|
|
|
|
service_type = "databaseService"
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls) -> None:
|
|
|
|
"""
|
|
|
|
Prepare ingredients
|
|
|
|
"""
|
2021-10-31 16:23:01 +01:00
|
|
|
|
2021-10-26 23:18:43 +02:00
|
|
|
cls.service_entity = cls.metadata.create_or_update(data=cls.service)
|
|
|
|
|
2022-04-05 21:20:39 +02:00
|
|
|
create_db = CreateDatabaseRequest(
|
2021-10-26 23:18:43 +02:00
|
|
|
name="test-db",
|
2023-02-13 00:08:55 -08:00
|
|
|
service=cls.service_entity.fullyQualifiedName,
|
2021-10-26 23:18:43 +02:00
|
|
|
)
|
|
|
|
|
2022-04-05 21:20:39 +02:00
|
|
|
create_db_entity = cls.metadata.create_or_update(data=create_db)
|
2021-10-26 23:18:43 +02:00
|
|
|
|
2022-04-05 21:20:39 +02:00
|
|
|
create_schema = CreateDatabaseSchemaRequest(
|
2023-02-13 00:08:55 -08:00
|
|
|
name="test-schema",
|
|
|
|
database=create_db_entity.fullyQualifiedName,
|
2022-04-05 21:20:39 +02:00
|
|
|
)
|
|
|
|
|
2023-02-13 00:08:55 -08:00
|
|
|
cls.create_schema_entity = cls.metadata.create_or_update(data=create_schema)
|
2022-02-04 12:39:08 -08:00
|
|
|
|
2021-10-26 23:18:43 +02:00
|
|
|
cls.entity = Table(
|
|
|
|
id=uuid.uuid4(),
|
|
|
|
name="test",
|
2023-02-13 00:08:55 -08:00
|
|
|
databaseSchema=EntityReference(
|
|
|
|
id=cls.create_schema_entity.id, type="databaseSchema"
|
|
|
|
),
|
2022-04-05 21:20:39 +02:00
|
|
|
fullyQualifiedName="test-service-table.test-db.test-schema.test",
|
2021-10-26 23:18:43 +02:00
|
|
|
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
|
|
|
)
|
|
|
|
|
2022-02-01 01:29:56 +01:00
|
|
|
cls.create = CreateTableRequest(
|
2021-10-26 23:18:43 +02:00
|
|
|
name="test",
|
2023-02-13 00:08:55 -08:00
|
|
|
databaseSchema=cls.create_schema_entity.fullyQualifiedName,
|
2021-10-26 23:18:43 +02:00
|
|
|
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
|
|
|
)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls) -> None:
|
|
|
|
"""
|
|
|
|
Clean up
|
|
|
|
"""
|
|
|
|
|
|
|
|
service_id = str(
|
|
|
|
cls.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=DatabaseService, fqn="test-service-table"
|
2024-06-05 21:18:37 +02:00
|
|
|
).id.root
|
2021-10-26 23:18:43 +02:00
|
|
|
)
|
|
|
|
|
2022-03-21 22:30:39 +05:30
|
|
|
cls.metadata.delete(
|
2022-04-05 21:20:39 +02:00
|
|
|
entity=DatabaseService,
|
|
|
|
entity_id=service_id,
|
|
|
|
recursive=True,
|
|
|
|
hard_delete=True,
|
2022-03-21 22:30:39 +05:30
|
|
|
)
|
2021-10-26 23:18:43 +02:00
|
|
|
|
|
|
|
def test_create(self):
|
|
|
|
"""
|
|
|
|
We can create a Table and we receive it back as Entity
|
|
|
|
"""
|
|
|
|
|
|
|
|
res = self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
self.assertEqual(res.name, self.entity.name)
|
2022-04-05 21:20:39 +02:00
|
|
|
self.assertEqual(res.databaseSchema.id, self.entity.databaseSchema.id)
|
2024-07-29 23:06:39 -07:00
|
|
|
self.assertEqual(res.owners, EntityReferenceList(root=[]))
|
2021-10-26 23:18:43 +02:00
|
|
|
|
|
|
|
def test_update(self):
|
|
|
|
"""
|
|
|
|
Updating it properly changes its properties
|
|
|
|
"""
|
|
|
|
|
|
|
|
res_create = self.metadata.create_or_update(data=self.create)
|
|
|
|
|
2024-07-04 14:54:41 +02:00
|
|
|
updated = self.create.model_dump(exclude_unset=True)
|
2024-07-29 23:06:39 -07:00
|
|
|
updated["owners"] = self.owners
|
2022-02-01 01:29:56 +01:00
|
|
|
updated_entity = CreateTableRequest(**updated)
|
2021-10-26 23:18:43 +02:00
|
|
|
|
|
|
|
res = self.metadata.create_or_update(data=updated_entity)
|
|
|
|
|
2022-04-05 21:20:39 +02:00
|
|
|
# Same ID, updated owner
|
2023-02-13 00:08:55 -08:00
|
|
|
self.assertEqual(
|
|
|
|
res.databaseSchema.fullyQualifiedName,
|
2024-06-05 21:18:37 +02:00
|
|
|
updated_entity.databaseSchema.root,
|
2023-02-13 00:08:55 -08:00
|
|
|
)
|
2021-10-26 23:18:43 +02:00
|
|
|
self.assertEqual(res_create.id, res.id)
|
2024-07-29 23:06:39 -07:00
|
|
|
self.assertEqual(res.owners.root[0].id, self.user.id)
|
2021-10-26 23:18:43 +02:00
|
|
|
|
|
|
|
def test_get_name(self):
|
|
|
|
"""
|
|
|
|
We can fetch a Table by name and get it back as Entity
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
res = self.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=Table, fqn=self.entity.fullyQualifiedName
|
2021-10-26 23:18:43 +02:00
|
|
|
)
|
|
|
|
self.assertEqual(res.name, self.entity.name)
|
|
|
|
|
2023-08-21 11:35:36 +02:00
|
|
|
# Now check that we get a None if the table does not exist
|
|
|
|
nullable_res = self.metadata.get_by_name(entity=Table, fqn="something.made.up")
|
|
|
|
self.assertIsNone(nullable_res)
|
|
|
|
|
2021-10-26 23:18:43 +02:00
|
|
|
def test_get_id(self):
|
|
|
|
"""
|
|
|
|
We can fetch a Table by ID and get it back as Entity
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
# First pick up by name
|
|
|
|
res_name = self.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=Table, fqn=self.entity.fullyQualifiedName
|
2021-10-26 23:18:43 +02:00
|
|
|
)
|
|
|
|
# Then fetch by ID
|
2024-06-05 21:18:37 +02:00
|
|
|
res = self.metadata.get_by_id(entity=Table, entity_id=str(res_name.id.root))
|
2021-10-26 23:18:43 +02:00
|
|
|
|
|
|
|
self.assertEqual(res_name.id, res.id)
|
|
|
|
|
|
|
|
def test_list(self):
|
|
|
|
"""
|
|
|
|
We can list all our Tables
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
2025-06-24 17:16:13 +05:30
|
|
|
res = self.metadata.list_entities(
|
|
|
|
entity=Table, params={"database": "test-service-table.test-db"}
|
|
|
|
)
|
2021-10-26 23:18:43 +02:00
|
|
|
|
|
|
|
# Fetch our test Database. We have already inserted it, so we should find it
|
|
|
|
data = next(
|
|
|
|
iter(ent for ent in res.entities if ent.name == self.entity.name), None
|
|
|
|
)
|
|
|
|
assert data
|
|
|
|
|
2024-11-28 09:35:41 +01:00
|
|
|
def test_list_all_and_paginate(self):
|
2022-05-26 07:35:23 +02:00
|
|
|
"""
|
|
|
|
Validate generator utility to fetch all tables
|
|
|
|
"""
|
|
|
|
fake_create = deepcopy(self.create)
|
|
|
|
for i in range(0, 10):
|
2024-06-05 21:18:37 +02:00
|
|
|
fake_create.name = EntityName(self.create.name.root + str(i))
|
2022-05-26 07:35:23 +02:00
|
|
|
self.metadata.create_or_update(data=fake_create)
|
|
|
|
|
|
|
|
all_entities = self.metadata.list_all_entities(
|
|
|
|
entity=Table, limit=2 # paginate in batches of pairs
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
len(list(all_entities)) >= 10
|
|
|
|
) # In case the default testing entity is not present
|
|
|
|
|
2024-11-28 09:35:41 +01:00
|
|
|
entity_list = self.metadata.list_entities(entity=Table, limit=2)
|
|
|
|
assert len(entity_list.entities) == 2
|
|
|
|
after_entity_list = self.metadata.list_entities(
|
|
|
|
entity=Table, limit=2, after=entity_list.after
|
|
|
|
)
|
|
|
|
assert len(after_entity_list.entities) == 2
|
|
|
|
before_entity_list = self.metadata.list_entities(
|
|
|
|
entity=Table, limit=2, before=after_entity_list.before
|
|
|
|
)
|
|
|
|
assert before_entity_list.entities == entity_list.entities
|
|
|
|
|
2021-10-26 23:18:43 +02:00
|
|
|
def test_delete(self):
|
|
|
|
"""
|
|
|
|
We can delete a Table by ID
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
# Find by name
|
|
|
|
res_name = self.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=Table, fqn=self.entity.fullyQualifiedName
|
2021-10-26 23:18:43 +02:00
|
|
|
)
|
|
|
|
# Then fetch by ID
|
2021-12-06 08:40:53 +01:00
|
|
|
res_id = self.metadata.get_by_id(entity=Table, entity_id=res_name.id)
|
2021-10-26 23:18:43 +02:00
|
|
|
|
|
|
|
# Delete
|
2024-06-05 21:18:37 +02:00
|
|
|
self.metadata.delete(entity=Table, entity_id=str(res_id.id.root))
|
2021-10-26 23:18:43 +02:00
|
|
|
|
|
|
|
# Then we should not find it
|
|
|
|
res = self.metadata.list_entities(entity=Table)
|
|
|
|
assert not next(
|
2021-10-28 20:31:38 +02:00
|
|
|
iter(
|
|
|
|
ent
|
|
|
|
for ent in res.entities
|
|
|
|
if ent.fullyQualifiedName == self.entity.fullyQualifiedName
|
|
|
|
),
|
|
|
|
None,
|
2021-10-26 23:18:43 +02:00
|
|
|
)
|
2021-11-01 16:57:53 +01:00
|
|
|
|
|
|
|
def test_ingest_sample_data(self):
|
|
|
|
"""
|
|
|
|
We can ingest sample TableData
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
# First pick up by name
|
|
|
|
res = self.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=Table, fqn=self.entity.fullyQualifiedName
|
2021-11-01 16:57:53 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
sample_data = TableData(columns=["id"], rows=[[1], [2], [3]])
|
|
|
|
|
|
|
|
res_sample = self.metadata.ingest_table_sample_data(res, sample_data)
|
|
|
|
assert res_sample == sample_data
|
|
|
|
|
2022-12-12 15:08:01 +01:00
|
|
|
# Let's also validate that we can properly retrieve sample data back
|
|
|
|
res_sample = self.metadata.get_sample_data(table=res).sampleData
|
|
|
|
assert res_sample == sample_data
|
|
|
|
|
2021-11-01 16:57:53 +01:00
|
|
|
def test_ingest_table_profile_data(self):
|
|
|
|
"""
|
|
|
|
We can ingest profile data TableProfile
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
# First pick up by name
|
|
|
|
res = self.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=Table, fqn=self.entity.fullyQualifiedName
|
2021-11-01 16:57:53 +01:00
|
|
|
)
|
|
|
|
|
2022-08-22 09:01:24 -07:00
|
|
|
table_profile = TableProfile(
|
2024-09-06 08:16:31 +02:00
|
|
|
timestamp=Timestamp(int(datetime.now().timestamp() * 1000)),
|
2022-08-04 07:22:47 -07:00
|
|
|
columnCount=1.0,
|
|
|
|
rowCount=3.0,
|
|
|
|
)
|
2022-08-22 09:01:24 -07:00
|
|
|
column_profile = [
|
|
|
|
ColumnProfile(
|
|
|
|
name="id",
|
|
|
|
uniqueCount=3.0,
|
|
|
|
uniqueProportion=1.0,
|
|
|
|
min=1,
|
|
|
|
max=3,
|
|
|
|
mean=1.5,
|
|
|
|
sum=2,
|
|
|
|
stddev=None,
|
2024-09-06 08:16:31 +02:00
|
|
|
timestamp=Timestamp(root=int(datetime.now().timestamp() * 1000)),
|
2022-08-22 09:01:24 -07:00
|
|
|
)
|
|
|
|
]
|
2022-12-07 14:33:30 +01:00
|
|
|
|
|
|
|
system_profile = [
|
|
|
|
SystemProfile(
|
2024-09-06 08:16:31 +02:00
|
|
|
timestamp=Timestamp(root=int(datetime.now().timestamp() * 1000)),
|
2024-06-05 21:18:37 +02:00
|
|
|
operation=DmlOperationType.INSERT,
|
2022-12-07 14:33:30 +01:00
|
|
|
rowsAffected=11,
|
|
|
|
),
|
|
|
|
SystemProfile(
|
2024-09-06 08:16:31 +02:00
|
|
|
timestamp=Timestamp(root=int(datetime.now().timestamp() * 1000) + 1),
|
2024-06-05 21:18:37 +02:00
|
|
|
operation=DmlOperationType.UPDATE,
|
2022-12-07 14:33:30 +01:00
|
|
|
rowsAffected=110,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
2022-08-22 09:01:24 -07:00
|
|
|
profile = CreateTableProfileRequest(
|
2022-12-07 14:33:30 +01:00
|
|
|
tableProfile=table_profile,
|
|
|
|
columnProfile=column_profile,
|
|
|
|
systemProfile=system_profile,
|
2022-08-22 09:01:24 -07:00
|
|
|
)
|
2022-11-25 06:56:12 +01:00
|
|
|
self.metadata.ingest_profile_data(res, profile)
|
2022-12-06 21:07:04 -08:00
|
|
|
|
|
|
|
table = self.metadata.get_latest_table_profile(self.entity.fullyQualifiedName)
|
|
|
|
|
|
|
|
assert table.profile == table_profile
|
2021-11-01 16:57:53 +01:00
|
|
|
|
2022-08-22 09:01:24 -07:00
|
|
|
res_column_profile = next(
|
2024-06-05 21:18:37 +02:00
|
|
|
(col.profile for col in table.columns if col.name.root == "id")
|
2022-08-22 09:01:24 -07:00
|
|
|
)
|
|
|
|
assert res_column_profile == column_profile[0]
|
2021-11-01 16:57:53 +01:00
|
|
|
|
|
|
|
def test_publish_table_usage(self):
|
|
|
|
"""
|
|
|
|
We can POST usage data for a Table
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
# First pick up by name
|
|
|
|
res = self.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=Table, fqn=self.entity.fullyQualifiedName
|
2021-11-01 16:57:53 +01:00
|
|
|
)
|
|
|
|
|
2022-06-03 13:42:28 +05:30
|
|
|
usage = UsageRequest(date="2021-10-20", count=10)
|
2021-11-01 16:57:53 +01:00
|
|
|
|
|
|
|
self.metadata.publish_table_usage(res, usage)
|
|
|
|
|
|
|
|
def test_publish_frequently_joined_with(self):
|
|
|
|
"""
|
|
|
|
We can PUT freq Table JOINs
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
# First pick up by name
|
|
|
|
res = self.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=Table, fqn=self.entity.fullyQualifiedName
|
2021-11-01 16:57:53 +01:00
|
|
|
)
|
|
|
|
|
2022-05-26 09:41:26 +03:00
|
|
|
column_join_table_req = CreateTableRequest(
|
2024-06-05 21:18:37 +02:00
|
|
|
name=EntityName("another-test"),
|
2023-02-13 00:08:55 -08:00
|
|
|
databaseSchema=self.create_schema_entity.fullyQualifiedName,
|
2024-06-05 21:18:37 +02:00
|
|
|
columns=[Column(name=ColumnName("another_id"), dataType=DataType.BIGINT)],
|
2021-11-01 16:57:53 +01:00
|
|
|
)
|
2022-05-26 09:41:26 +03:00
|
|
|
column_join_table_res = self.metadata.create_or_update(column_join_table_req)
|
|
|
|
|
|
|
|
direct_join_table_req = CreateTableRequest(
|
2024-06-05 21:18:37 +02:00
|
|
|
name=EntityName("direct-join-test"),
|
2023-02-13 00:08:55 -08:00
|
|
|
databaseSchema=self.create_schema_entity.fullyQualifiedName,
|
2022-05-26 09:41:26 +03:00
|
|
|
columns=[],
|
|
|
|
)
|
|
|
|
direct_join_table_res = self.metadata.create_or_update(direct_join_table_req)
|
2021-11-01 16:57:53 +01:00
|
|
|
|
|
|
|
joins = TableJoins(
|
2024-06-05 21:18:37 +02:00
|
|
|
startDate=Date(root=datetime.today().date()),
|
2021-11-01 16:57:53 +01:00
|
|
|
dayCount=1,
|
2022-05-26 09:41:26 +03:00
|
|
|
directTableJoins=[
|
|
|
|
JoinedWith(
|
2024-06-05 21:18:37 +02:00
|
|
|
fullyQualifiedName=FullyQualifiedEntityName(
|
|
|
|
"test-service-table.test-db.test-schema.direct-join-test"
|
|
|
|
),
|
2022-05-26 09:41:26 +03:00
|
|
|
joinCount=2,
|
|
|
|
)
|
|
|
|
],
|
2021-11-01 16:57:53 +01:00
|
|
|
columnJoins=[
|
|
|
|
ColumnJoins(
|
2024-06-05 21:18:37 +02:00
|
|
|
columnName=ColumnName("id"),
|
2021-11-01 16:57:53 +01:00
|
|
|
joinedWith=[
|
2022-05-26 09:41:26 +03:00
|
|
|
JoinedWith(
|
2024-06-05 21:18:37 +02:00
|
|
|
fullyQualifiedName=FullyQualifiedEntityName(
|
|
|
|
"test-service-table.test-db.test-schema.another-test.another_id"
|
|
|
|
),
|
2021-11-01 16:57:53 +01:00
|
|
|
joinCount=2,
|
|
|
|
)
|
|
|
|
],
|
|
|
|
)
|
|
|
|
],
|
|
|
|
)
|
|
|
|
|
|
|
|
self.metadata.publish_frequently_joined_with(res, joins)
|
2024-06-05 21:18:37 +02:00
|
|
|
self.metadata.delete(entity=Table, entity_id=str(column_join_table_res.id.root))
|
|
|
|
self.metadata.delete(entity=Table, entity_id=str(direct_join_table_res.id.root))
|
2022-01-07 10:37:56 +01:00
|
|
|
|
2022-11-25 06:56:12 +01:00
|
|
|
def test_table_queries(self):
|
|
|
|
"""
|
|
|
|
Test add and update table query data
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
res = self.metadata.get_by_name(
|
|
|
|
entity=Table, fqn=self.entity.fullyQualifiedName
|
|
|
|
)
|
|
|
|
|
2023-09-19 07:37:47 +02:00
|
|
|
query_no_user = CreateQueryRequest(
|
2024-06-05 21:18:37 +02:00
|
|
|
query=SqlQuery("select * from first_awesome"),
|
|
|
|
service=FullyQualifiedEntityName(self.service.name.root),
|
2023-09-19 07:37:47 +02:00
|
|
|
)
|
2022-11-25 06:56:12 +01:00
|
|
|
|
2023-03-16 09:25:30 +05:30
|
|
|
self.metadata.ingest_entity_queries_data(entity=res, queries=[query_no_user])
|
|
|
|
table_with_query: List[Query] = self.metadata.get_entity_queries(
|
|
|
|
res.id, fields=["*"]
|
2022-11-25 06:56:12 +01:00
|
|
|
)
|
|
|
|
|
2023-03-16 09:25:30 +05:30
|
|
|
assert len(table_with_query) == 1
|
|
|
|
assert table_with_query[0].query == query_no_user.query
|
|
|
|
assert table_with_query[0].users == []
|
2022-11-25 06:56:12 +01:00
|
|
|
|
|
|
|
# Validate that we can properly add user information
|
2023-03-16 09:25:30 +05:30
|
|
|
query_with_user = CreateQueryRequest(
|
2024-03-13 05:02:26 -07:00
|
|
|
query="select * from second_awesome",
|
2024-07-29 23:06:39 -07:00
|
|
|
users=[self.owners.root[0].fullyQualifiedName],
|
2024-06-05 21:18:37 +02:00
|
|
|
service=FullyQualifiedEntityName(self.service.name.root),
|
2023-03-16 09:25:30 +05:30
|
|
|
)
|
2022-11-25 06:56:12 +01:00
|
|
|
|
2023-03-16 09:25:30 +05:30
|
|
|
self.metadata.ingest_entity_queries_data(entity=res, queries=[query_with_user])
|
|
|
|
table_with_query: List[Query] = self.metadata.get_entity_queries(
|
|
|
|
res.id, fields=["*"]
|
2022-11-25 06:56:12 +01:00
|
|
|
)
|
|
|
|
|
2024-03-13 05:02:26 -07:00
|
|
|
assert len(table_with_query) == 2
|
|
|
|
query_with_owner = next(
|
|
|
|
(
|
|
|
|
query
|
|
|
|
for query in table_with_query
|
|
|
|
if query.query == query_with_user.query
|
|
|
|
),
|
|
|
|
None,
|
|
|
|
)
|
|
|
|
assert len(query_with_owner.users) == 1
|
2024-07-29 23:06:39 -07:00
|
|
|
assert query_with_owner.users[0].id == self.owners.root[0].id
|
2022-11-25 06:56:12 +01:00
|
|
|
|
2022-01-07 10:37:56 +01:00
|
|
|
def test_list_versions(self):
|
|
|
|
"""
|
|
|
|
test list table entity versions
|
|
|
|
"""
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
# Find by name
|
|
|
|
res_name = self.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=Table, fqn=self.entity.fullyQualifiedName
|
2022-01-07 10:37:56 +01:00
|
|
|
)
|
|
|
|
|
2022-01-10 09:07:39 +01:00
|
|
|
res = self.metadata.get_list_entity_versions(
|
2024-06-05 21:18:37 +02:00
|
|
|
entity=Table, entity_id=res_name.id.root
|
2022-01-10 09:07:39 +01:00
|
|
|
)
|
2022-01-07 10:37:56 +01:00
|
|
|
assert res
|
|
|
|
|
|
|
|
def test_get_entity_version(self):
|
|
|
|
"""
|
|
|
|
test get table entity version
|
|
|
|
"""
|
|
|
|
self.metadata.create_or_update(data=self.create)
|
|
|
|
|
|
|
|
# Find by name
|
|
|
|
res_name = self.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=Table, fqn=self.entity.fullyQualifiedName
|
2022-01-07 10:37:56 +01:00
|
|
|
)
|
|
|
|
res = self.metadata.get_entity_version(
|
2024-06-05 21:18:37 +02:00
|
|
|
entity=Table, entity_id=res_name.id.root, version=0.1
|
2022-01-07 10:37:56 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
# check we get the correct version requested and the correct entity ID
|
2024-06-05 21:18:37 +02:00
|
|
|
assert res.version.root == 0.1
|
2022-01-07 10:37:56 +01:00
|
|
|
assert res.id == res_name.id
|
2022-01-10 09:07:39 +01:00
|
|
|
|
|
|
|
def test_get_entity_ref(self):
|
|
|
|
"""
|
|
|
|
test get EntityReference
|
|
|
|
"""
|
|
|
|
res = self.metadata.create_or_update(data=self.create)
|
|
|
|
entity_ref = self.metadata.get_entity_reference(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=Table, fqn=res.fullyQualifiedName
|
2022-01-10 09:07:39 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
assert res.id == entity_ref.id
|
2022-03-02 16:46:28 +01:00
|
|
|
|
2022-03-16 06:05:59 +01:00
|
|
|
def test_update_profile_sample(self):
|
|
|
|
"""
|
|
|
|
We can safely update the profile sample %
|
|
|
|
"""
|
|
|
|
|
|
|
|
table = self.metadata.create_or_update(data=self.create)
|
2022-08-04 07:22:47 -07:00
|
|
|
assert table.tableProfilerConfig is None
|
2022-03-16 06:05:59 +01:00
|
|
|
|
2022-08-04 07:22:47 -07:00
|
|
|
self.metadata._create_or_update_table_profiler_config(
|
2022-12-06 21:07:04 -08:00
|
|
|
table.id, table_profiler_config=TableProfilerConfig(profileSample=50.0)
|
2022-03-16 06:05:59 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
stored = self.metadata.get_by_name(
|
2022-08-04 07:22:47 -07:00
|
|
|
entity=Table, fqn=table.fullyQualifiedName, fields=["tableProfilerConfig"]
|
2022-03-16 06:05:59 +01:00
|
|
|
)
|
2022-08-04 07:22:47 -07:00
|
|
|
assert stored.tableProfilerConfig.profileSample == 50.0
|
2023-12-04 17:59:09 +01:00
|
|
|
|
|
|
|
def test_list_w_skip_on_failure(self):
|
|
|
|
"""
|
|
|
|
We can list all our Tables even when some of them are broken
|
|
|
|
"""
|
|
|
|
|
|
|
|
# first validate that exception is raised when skip_on_failure is False
|
|
|
|
with patch.object(REST, "get", return_value=BAD_RESPONSE):
|
|
|
|
with pytest.raises(ValidationError):
|
|
|
|
self.metadata.list_entities(entity=Table)
|
|
|
|
|
|
|
|
with patch.object(REST, "get", return_value=BAD_RESPONSE):
|
|
|
|
res = self.metadata.list_entities(entity=Table, skip_on_failure=True)
|
|
|
|
|
|
|
|
# We should have 2 tables, the 3rd one is broken and should be skipped
|
|
|
|
assert len(res.entities) == 2
|
|
|
|
|
|
|
|
def test_list_all_w_skip_on_failure(self):
|
|
|
|
"""
|
|
|
|
Validate generator utility to fetch all tables even when some of them are broken
|
|
|
|
"""
|
|
|
|
# first validate that exception is raised when skip_on_failure is False
|
|
|
|
with patch.object(REST, "get", return_value=BAD_RESPONSE):
|
|
|
|
with pytest.raises(ValidationError):
|
|
|
|
res = self.metadata.list_all_entities(
|
|
|
|
entity=Table,
|
|
|
|
limit=1, # paginate in batches of pairs
|
|
|
|
)
|
|
|
|
list(res)
|
|
|
|
|
|
|
|
with patch.object(REST, "get", return_value=BAD_RESPONSE):
|
|
|
|
res = self.metadata.list_all_entities(
|
|
|
|
entity=Table,
|
|
|
|
limit=1,
|
|
|
|
skip_on_failure=True, # paginate in batches of pairs
|
|
|
|
)
|
|
|
|
|
|
|
|
# We should have 2 tables, the 3rd one is broken and should be skipped
|
|
|
|
assert len(list(res)) == 2
|
2024-09-06 12:12:44 +02:00
|
|
|
|
|
|
|
def test_table_with_slash_in_name(self):
|
|
|
|
"""E.g., `foo.bar/baz`"""
|
|
|
|
name = EntityName("foo.bar/baz")
|
|
|
|
new_table: Table = self.metadata.create_or_update(
|
|
|
|
data=get_create_entity(
|
|
|
|
entity=Table,
|
|
|
|
name=name,
|
|
|
|
reference=self.create_schema_entity.fullyQualifiedName,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
res: Table = self.metadata.get_by_name(
|
|
|
|
entity=Table, fqn=new_table.fullyQualifiedName
|
|
|
|
)
|
|
|
|
|
|
|
|
assert res.name == name
|
2025-03-27 14:20:26 +05:30
|
|
|
|
|
|
|
def test_ingest_sample_data_with_binary_data(self):
|
|
|
|
"""
|
|
|
|
Test ingesting sample data with binary data
|
|
|
|
"""
|
|
|
|
table: Table = self.metadata.create_or_update(
|
|
|
|
data=get_create_entity(
|
|
|
|
entity=Table,
|
|
|
|
name="random",
|
|
|
|
reference=self.create_schema_entity.fullyQualifiedName,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
sample_data = TableData(
|
|
|
|
columns=["id"], rows=[[b"data\x00\x01\x02\x8e\xba\xab\xf0"]]
|
|
|
|
)
|
|
|
|
res = self.metadata.ingest_table_sample_data(table, sample_data)
|
|
|
|
assert res == sample_data
|
|
|
|
|
|
|
|
sample_data = TableData(columns=["id"], rows=[[b"\x00\x01\x02"]])
|
|
|
|
res = self.metadata.ingest_table_sample_data(table, sample_data)
|
|
|
|
assert res == sample_data
|