mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-14 10:26:51 +00:00
fix(ingestion): ingest tables from dba_tables in oracle source (#5592)
* inspector object wrapper for oracle inspector * WIP * before golden file creation * fix(ingestion): ingest tables from dba_tables in oracle source * code review comments * address review comments Co-authored-by: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
This commit is contained in:
parent
cfaf7a0c20
commit
7d532b8886
@ -1,11 +1,12 @@
|
|||||||
from typing import Iterable, Optional
|
import logging
|
||||||
|
from typing import Any, Iterable, List, Optional, Tuple, cast
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
# This import verifies that the dependencies are available.
|
# This import verifies that the dependencies are available.
|
||||||
import cx_Oracle
|
import cx_Oracle
|
||||||
import pydantic
|
import pydantic
|
||||||
from pydantic.fields import Field
|
from pydantic.fields import Field
|
||||||
from sqlalchemy import event
|
from sqlalchemy import event, sql
|
||||||
from sqlalchemy.dialects.oracle.base import OracleDialect
|
from sqlalchemy.dialects.oracle.base import OracleDialect
|
||||||
from sqlalchemy.engine.reflection import Inspector
|
from sqlalchemy.engine.reflection import Inspector
|
||||||
|
|
||||||
@ -23,6 +24,8 @@ from datahub.ingestion.source.sql.sql_common import (
|
|||||||
make_sqlalchemy_type,
|
make_sqlalchemy_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
extra_oracle_types = {
|
extra_oracle_types = {
|
||||||
make_sqlalchemy_type("SDO_GEOMETRY"),
|
make_sqlalchemy_type("SDO_GEOMETRY"),
|
||||||
make_sqlalchemy_type("SDO_POINT_TYPE"),
|
make_sqlalchemy_type("SDO_POINT_TYPE"),
|
||||||
@ -75,6 +78,60 @@ class OracleConfig(BasicSQLAlchemyConfig):
|
|||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
class OracleInspectorObjectWrapper:
|
||||||
|
"""
|
||||||
|
Inspector class wrapper, which queries DBA_TABLES instead of ALL_TABLES
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, inspector_instance: Inspector):
|
||||||
|
self._inspector_instance = inspector_instance
|
||||||
|
self.log = logging.getLogger(__name__)
|
||||||
|
# tables that we don't want to ingest into the DataHub
|
||||||
|
self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX")
|
||||||
|
|
||||||
|
def get_schema_names(self) -> List[str]:
|
||||||
|
logger.debug("OracleInspectorObjectWrapper is in used")
|
||||||
|
s = "SELECT username FROM dba_users ORDER BY username"
|
||||||
|
cursor = self._inspector_instance.bind.execute(s)
|
||||||
|
return [
|
||||||
|
self._inspector_instance.dialect.normalize_name(row[0]) for row in cursor
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_table_names(self, schema: str = None, order_by: str = None) -> List[str]:
|
||||||
|
"""
|
||||||
|
skip order_by, we are not using order_by
|
||||||
|
"""
|
||||||
|
logger.debug("OracleInspectorObjectWrapper is in used")
|
||||||
|
schema = self._inspector_instance.dialect.denormalize_name(
|
||||||
|
schema or self.default_schema_name
|
||||||
|
)
|
||||||
|
|
||||||
|
if schema is None:
|
||||||
|
schema = self._inspector_instance.dialect.default_schema_name
|
||||||
|
|
||||||
|
sql_str = "SELECT table_name FROM dba_tables WHERE "
|
||||||
|
if self.exclude_tablespaces:
|
||||||
|
tablespace_str = ", ".join([f"'{ts}'" for ts in self.exclude_tablespaces])
|
||||||
|
sql_str += (
|
||||||
|
f"nvl(tablespace_name, 'no tablespace') NOT IN ({tablespace_str}) AND "
|
||||||
|
)
|
||||||
|
|
||||||
|
sql_str += "OWNER = :owner AND IOT_NAME IS NULL "
|
||||||
|
logger.debug(f"SQL = {sql_str}")
|
||||||
|
cursor = self._inspector_instance.bind.execute(sql.text(sql_str), owner=schema)
|
||||||
|
|
||||||
|
return [
|
||||||
|
self._inspector_instance.dialect.normalize_name(row[0]) for row in cursor
|
||||||
|
]
|
||||||
|
|
||||||
|
def __getattr__(self, item: str) -> Any:
|
||||||
|
# Map method call to wrapper class
|
||||||
|
if item in self.__dict__:
|
||||||
|
return getattr(self, item)
|
||||||
|
# Map method call to original class
|
||||||
|
return getattr(self._inspector_instance, item)
|
||||||
|
|
||||||
|
|
||||||
@platform_name("Oracle")
|
@platform_name("Oracle")
|
||||||
@config_class(OracleConfig)
|
@config_class(OracleConfig)
|
||||||
@support_status(SupportStatus.CERTIFIED)
|
@support_status(SupportStatus.CERTIFIED)
|
||||||
@ -104,7 +161,8 @@ class OracleSource(SQLAlchemySource):
|
|||||||
event.listen(
|
event.listen(
|
||||||
inspector.engine, "before_cursor_execute", before_cursor_execute
|
inspector.engine, "before_cursor_execute", before_cursor_execute
|
||||||
)
|
)
|
||||||
yield inspector
|
# To silent the mypy lint error
|
||||||
|
yield cast(Inspector, OracleInspectorObjectWrapper(inspector))
|
||||||
|
|
||||||
def get_workunits(self):
|
def get_workunits(self):
|
||||||
with patch.dict(
|
with patch.dict(
|
||||||
|
125
metadata-ingestion/tests/integration/oracle/common.py
Normal file
125
metadata-ingestion/tests/integration/oracle/common.py
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
import pathlib
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from sqlalchemy.sql.elements import TextClause
|
||||||
|
|
||||||
|
from datahub.ingestion.run.pipeline import Pipeline
|
||||||
|
from datahub.ingestion.source.sql.oracle import OracleConfig
|
||||||
|
from tests.test_helpers import mce_helpers
|
||||||
|
|
||||||
|
|
||||||
|
class OracleSourceMockDataBase:
|
||||||
|
"""
|
||||||
|
Extend this class if needed to mock data in different way
|
||||||
|
"""
|
||||||
|
|
||||||
|
MOCK_DATA = {
|
||||||
|
"SELECT username FROM dba_users ORDER BY username": (["schema1"], ["schema2"]),
|
||||||
|
"schema1": (["test1"], ["test2"]),
|
||||||
|
"schema2": (["test3"], ["test4"]),
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_data(self, *arg: Any, **kwargs: Any) -> Any:
|
||||||
|
assert arg or kwargs
|
||||||
|
key: Optional[str] = None
|
||||||
|
|
||||||
|
if arg and isinstance(arg[0], str):
|
||||||
|
key = arg[0]
|
||||||
|
|
||||||
|
if arg and isinstance(arg[0], TextClause) and kwargs:
|
||||||
|
key = kwargs.get("owner")
|
||||||
|
# key should present in MOCK_DATA
|
||||||
|
assert key in OracleSourceMockDataBase.MOCK_DATA
|
||||||
|
|
||||||
|
return OracleSourceMockDataBase.MOCK_DATA[key]
|
||||||
|
|
||||||
|
|
||||||
|
class OracleTestCaseBase:
|
||||||
|
"""
|
||||||
|
Extend this class if needed to create new a test-case for oracle source
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
pytestconfig: pytest.Config,
|
||||||
|
tmp_path: pathlib.Path,
|
||||||
|
golden_file_name: str = "golden_test_ingest.json",
|
||||||
|
output_file_name: str = "oracle_mce_output.json",
|
||||||
|
):
|
||||||
|
self.pytestconfig = pytestconfig
|
||||||
|
self.tmp_path = tmp_path
|
||||||
|
self.golden_file_name = golden_file_name
|
||||||
|
self.mces_output_file_name = output_file_name
|
||||||
|
self.default_mock_data = OracleSourceMockDataBase()
|
||||||
|
|
||||||
|
def get_recipe_source(self) -> dict:
|
||||||
|
return {
|
||||||
|
"source": {
|
||||||
|
"type": "oracle",
|
||||||
|
"config": {
|
||||||
|
**self.get_default_recipe_config().dict(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_username(self) -> str:
|
||||||
|
return "foo"
|
||||||
|
|
||||||
|
def get_password(self) -> str:
|
||||||
|
return "bar"
|
||||||
|
|
||||||
|
def get_oracle_host_port(self) -> str:
|
||||||
|
return "fake:port"
|
||||||
|
|
||||||
|
def get_database_name(self) -> str:
|
||||||
|
return "OraDoc"
|
||||||
|
|
||||||
|
def get_default_recipe_config(self) -> OracleConfig:
|
||||||
|
return OracleConfig(
|
||||||
|
host_port=self.get_oracle_host_port(),
|
||||||
|
database=self.get_database_name(),
|
||||||
|
username=self.get_username(),
|
||||||
|
password=self.get_password(),
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_test_resource_dir(
|
||||||
|
self,
|
||||||
|
) -> pathlib.Path:
|
||||||
|
return self.pytestconfig.rootpath / "tests/integration/oracle"
|
||||||
|
|
||||||
|
def get_recipe_sink(self, output_path: str) -> dict:
|
||||||
|
return {
|
||||||
|
"sink": {
|
||||||
|
"type": "file",
|
||||||
|
"config": {
|
||||||
|
"filename": output_path,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_output_mce_path(self):
|
||||||
|
return "{}/{}".format(self.tmp_path, self.mces_output_file_name)
|
||||||
|
|
||||||
|
def get_mock_data_impl(self):
|
||||||
|
return self.default_mock_data
|
||||||
|
|
||||||
|
def get_mock_data(self, *arg: Any, **kwargs: Any) -> Any:
|
||||||
|
return self.get_mock_data_impl().get_data(*arg, **kwargs)
|
||||||
|
|
||||||
|
def apply(self):
|
||||||
|
output_path = self.get_output_mce_path()
|
||||||
|
source_recipe = {
|
||||||
|
**self.get_recipe_source(),
|
||||||
|
**self.get_recipe_sink(output_path),
|
||||||
|
}
|
||||||
|
pipeline = Pipeline.create(source_recipe)
|
||||||
|
pipeline.run()
|
||||||
|
pipeline.raise_from_status()
|
||||||
|
mce_helpers.check_golden_file(
|
||||||
|
self.pytestconfig,
|
||||||
|
output_path=output_path,
|
||||||
|
golden_path="{}/{}".format(
|
||||||
|
self.get_test_resource_dir(), self.golden_file_name
|
||||||
|
),
|
||||||
|
)
|
@ -0,0 +1,627 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "containerProperties",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"customProperties\": {\"platform\": \"oracle\", \"instance\": \"PROD\", \"database\": \"oradoc\"}, \"name\": \"oradoc\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"platform\": \"urn:li:dataPlatform:oracle\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "subTypes",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"typeNames\": [\"Database\"]}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "containerProperties",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"customProperties\": {\"platform\": \"oracle\", \"instance\": \"PROD\", \"database\": \"oradoc\", \"schema\": \"schema1\"}, \"name\": \"schema1\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"platform\": \"urn:li:dataPlatform:oracle\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "subTypes",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"typeNames\": [\"Schema\"]}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "container",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"container\": \"urn:li:container:8c867b02fcc2615b19cd02b15b023287\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "container",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"container\": \"urn:li:container:c093e810646c7ebc493237bb24a3538f\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||||
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.Status": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||||
|
"customProperties": {
|
||||||
|
"p1": "property1"
|
||||||
|
},
|
||||||
|
"externalUrl": null,
|
||||||
|
"name": "test1",
|
||||||
|
"qualifiedName": null,
|
||||||
|
"description": "fake_comments",
|
||||||
|
"uri": null,
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||||
|
"schemaName": "schema1.test1",
|
||||||
|
"platform": "urn:li:dataPlatform:oracle",
|
||||||
|
"version": 0,
|
||||||
|
"created": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"deleted": null,
|
||||||
|
"dataset": null,
|
||||||
|
"cluster": null,
|
||||||
|
"hash": "",
|
||||||
|
"platformSchema": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.MySqlDDL": {
|
||||||
|
"tableSchema": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fields": [],
|
||||||
|
"primaryKeys": null,
|
||||||
|
"foreignKeysSpecs": null,
|
||||||
|
"foreignKeys": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "subTypes",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"typeNames\": [\"table\"]}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "container",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"container\": \"urn:li:container:c093e810646c7ebc493237bb24a3538f\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||||
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.Status": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||||
|
"customProperties": {
|
||||||
|
"p1": "property1"
|
||||||
|
},
|
||||||
|
"externalUrl": null,
|
||||||
|
"name": "test2",
|
||||||
|
"qualifiedName": null,
|
||||||
|
"description": "fake_comments",
|
||||||
|
"uri": null,
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||||
|
"schemaName": "schema1.test2",
|
||||||
|
"platform": "urn:li:dataPlatform:oracle",
|
||||||
|
"version": 0,
|
||||||
|
"created": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"deleted": null,
|
||||||
|
"dataset": null,
|
||||||
|
"cluster": null,
|
||||||
|
"hash": "",
|
||||||
|
"platformSchema": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.MySqlDDL": {
|
||||||
|
"tableSchema": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fields": [],
|
||||||
|
"primaryKeys": null,
|
||||||
|
"foreignKeysSpecs": null,
|
||||||
|
"foreignKeys": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "subTypes",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"typeNames\": [\"table\"]}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "containerProperties",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"customProperties\": {\"platform\": \"oracle\", \"instance\": \"PROD\", \"database\": \"oradoc\", \"schema\": \"schema2\"}, \"name\": \"schema2\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"platform\": \"urn:li:dataPlatform:oracle\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "subTypes",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"typeNames\": [\"Schema\"]}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "container",
|
||||||
|
"entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "container",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"container\": \"urn:li:container:8c867b02fcc2615b19cd02b15b023287\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "container",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"container\": \"urn:li:container:ab1a240f35ae787df0eff0e6726a9825\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||||
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.Status": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||||
|
"customProperties": {
|
||||||
|
"p1": "property1"
|
||||||
|
},
|
||||||
|
"externalUrl": null,
|
||||||
|
"name": "test3",
|
||||||
|
"qualifiedName": null,
|
||||||
|
"description": "fake_comments",
|
||||||
|
"uri": null,
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||||
|
"schemaName": "schema2.test3",
|
||||||
|
"platform": "urn:li:dataPlatform:oracle",
|
||||||
|
"version": 0,
|
||||||
|
"created": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"deleted": null,
|
||||||
|
"dataset": null,
|
||||||
|
"cluster": null,
|
||||||
|
"hash": "",
|
||||||
|
"platformSchema": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.MySqlDDL": {
|
||||||
|
"tableSchema": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fields": [],
|
||||||
|
"primaryKeys": null,
|
||||||
|
"foreignKeysSpecs": null,
|
||||||
|
"foreignKeys": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "subTypes",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"typeNames\": [\"table\"]}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "container",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"container\": \"urn:li:container:ab1a240f35ae787df0eff0e6726a9825\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||||
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.Status": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||||
|
"customProperties": {
|
||||||
|
"p1": "property1"
|
||||||
|
},
|
||||||
|
"externalUrl": null,
|
||||||
|
"name": "test4",
|
||||||
|
"qualifiedName": null,
|
||||||
|
"description": "fake_comments",
|
||||||
|
"uri": null,
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||||
|
"schemaName": "schema2.test4",
|
||||||
|
"platform": "urn:li:dataPlatform:oracle",
|
||||||
|
"version": 0,
|
||||||
|
"created": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"deleted": null,
|
||||||
|
"dataset": null,
|
||||||
|
"cluster": null,
|
||||||
|
"hash": "",
|
||||||
|
"platformSchema": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.MySqlDDL": {
|
||||||
|
"tableSchema": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fields": [],
|
||||||
|
"primaryKeys": null,
|
||||||
|
"foreignKeysSpecs": null,
|
||||||
|
"foreignKeys": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "subTypes",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"typeNames\": [\"table\"]}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1643871600000,
|
||||||
|
"runId": "oracle-2022_02_03-07_00_00",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
47
metadata-ingestion/tests/integration/oracle/test_oracle.py
Normal file
47
metadata-ingestion/tests/integration/oracle/test_oracle.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
from unittest import mock
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from freezegun import freeze_time
|
||||||
|
|
||||||
|
from tests.integration.oracle.common import OracleTestCaseBase # type: ignore
|
||||||
|
|
||||||
|
FROZEN_TIME = "2022-02-03 07:00:00"
|
||||||
|
|
||||||
|
|
||||||
|
class OracleIntegrationTestCase(OracleTestCaseBase):
|
||||||
|
def apply_mock_data(self, mock_create_engine, mock_inspect, mock_event):
|
||||||
|
mock_event.listen.return_value = None
|
||||||
|
|
||||||
|
connection_magic_mock = MagicMock()
|
||||||
|
connection_magic_mock.execute.side_effect = self.get_mock_data
|
||||||
|
|
||||||
|
inspector_magic_mock = MagicMock()
|
||||||
|
inspector_magic_mock.bind = connection_magic_mock
|
||||||
|
inspector_magic_mock.engine.url.database = self.get_database_name()
|
||||||
|
inspector_magic_mock.dialect.normalize_name.side_effect = lambda x: x
|
||||||
|
inspector_magic_mock.dialect.denormalize_name.side_effect = lambda x: x
|
||||||
|
|
||||||
|
inspector_magic_mock.get_table_comment.return_value = {
|
||||||
|
"text": "fake_comments",
|
||||||
|
"properties": {"p1": "property1"},
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_inspect.return_value = inspector_magic_mock
|
||||||
|
mock_create_engine.connect.return_value = connection_magic_mock
|
||||||
|
|
||||||
|
@mock.patch("datahub.ingestion.source.sql.sql_common.create_engine")
|
||||||
|
@mock.patch("datahub.ingestion.source.sql.sql_common.inspect")
|
||||||
|
@mock.patch("datahub.ingestion.source.sql.oracle.event")
|
||||||
|
def apply(self, mock_create_engine, mock_inspect, mock_event):
|
||||||
|
self.apply_mock_data(mock_create_engine, mock_inspect, mock_event)
|
||||||
|
super().apply()
|
||||||
|
|
||||||
|
|
||||||
|
@freeze_time(FROZEN_TIME)
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_oracle_source_integration(pytestconfig, tmp_path, mock_time):
|
||||||
|
oracle_source_integration_test = OracleIntegrationTestCase(
|
||||||
|
pytestconfig=pytestconfig, tmp_path=tmp_path
|
||||||
|
)
|
||||||
|
oracle_source_integration_test.apply()
|
Loading…
x
Reference in New Issue
Block a user