mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-24 17:59:52 +00:00

* feat(nosql-profiler): row count 1. Implemented the NoSQLProfilerInterface as an entrypoint for the nosql profiler. 2. Added the NoSQLMetric as an abstract class. 3. Implemented the interface for the MongoDB database source. 4. Implemented an e2e test using testcontainers. * added profiler support for mongodb connection * doc * use int_admin_ometa in test setup * - fixed linting issue in gx - removed unused inheritance * moved the nosql function into the metric class * feat(profiler): add dynamodb row count * feat(profiler): add dynamodb row count * formatting * validate_compose: raise exception for bad status code. * fixed import * format * feat(nosql-profiler): added sample data 1. Implemented the NoSQL sampler. 2. Some naming changes to the NoSQL adaptor to avoid fixing names with the profiler interface. 3. Tests. * added default sample limit * formatting * fixed import * feat(profiler): dynamodb sample data * tests for dynamo db sample data * format * format * use service connection for nosql adaptor factory * fixed tests * format * fixed after merge
94 lines
2.8 KiB
Python
94 lines
2.8 KiB
Python
import pytest
|
|
|
|
from metadata.generated.schema.entity.data.table import Table
|
|
from metadata.generated.schema.entity.services.databaseService import DatabaseService
|
|
from metadata.generated.schema.metadataIngestion.databaseServiceProfilerPipeline import (
|
|
ProfilerConfigType,
|
|
)
|
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
LogLevels,
|
|
OpenMetadataWorkflowConfig,
|
|
Sink,
|
|
Source,
|
|
SourceConfig,
|
|
WorkflowConfig,
|
|
)
|
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
from metadata.workflow.metadata import MetadataWorkflow
|
|
from metadata.workflow.profiler import ProfilerWorkflow
|
|
|
|
|
|
@pytest.fixture(autouse=True, scope="module")
|
|
def ingest_metadata(
|
|
db_service: DatabaseService, metadata: OpenMetadata, ingest_sample_data
|
|
):
|
|
workflow_config = OpenMetadataWorkflowConfig(
|
|
source=Source(
|
|
type=db_service.serviceType.name.lower(),
|
|
serviceName=db_service.fullyQualifiedName.__root__,
|
|
sourceConfig=SourceConfig(config={}),
|
|
serviceConnection=db_service.connection,
|
|
),
|
|
sink=Sink(
|
|
type="metadata-rest",
|
|
config={},
|
|
),
|
|
workflowConfig=WorkflowConfig(openMetadataServerConfig=metadata.config),
|
|
)
|
|
metadata_ingestion = MetadataWorkflow.create(workflow_config)
|
|
metadata_ingestion.execute()
|
|
metadata_ingestion.raise_from_status()
|
|
return
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def db_fqn(db_service: DatabaseService):
|
|
return ".".join(
|
|
[
|
|
db_service.fullyQualifiedName.__root__,
|
|
"default",
|
|
"default",
|
|
]
|
|
)
|
|
|
|
|
|
def test_sample_data(db_service, db_fqn, metadata):
|
|
workflow_config = {
|
|
"source": {
|
|
"type": db_service.serviceType.name.lower(),
|
|
"serviceName": db_service.fullyQualifiedName.__root__,
|
|
"sourceConfig": {
|
|
"config": {
|
|
"type": ProfilerConfigType.Profiler.value,
|
|
},
|
|
},
|
|
},
|
|
"processor": {
|
|
"type": "orm-profiler",
|
|
"config": {},
|
|
},
|
|
"sink": {
|
|
"type": "metadata-rest",
|
|
"config": {},
|
|
},
|
|
"workflowConfig": {
|
|
"loggerLevel": LogLevels.DEBUG,
|
|
"openMetadataServerConfig": metadata.config.dict(),
|
|
},
|
|
}
|
|
profiler_workflow = ProfilerWorkflow.create(workflow_config)
|
|
profiler_workflow.execute()
|
|
profiler_workflow.raise_from_status()
|
|
table = metadata.list_entities(
|
|
Table,
|
|
fields=["fullyQualifiedName"],
|
|
params={
|
|
"databaseSchema": db_fqn,
|
|
},
|
|
).entities[0]
|
|
sample_data = metadata.get_sample_data(table).sampleData
|
|
assert sample_data is not None
|
|
assert len(sample_data.columns) == 2
|
|
assert len(sample_data.rows) == 2
|
|
assert sample_data.rows[0][0] == "Alice"
|