Imri Paran 93ec391f5c
MINOR: Dynamodb sample data (#15264)
* feat(nosql-profiler): row count

1. Implemented the NoSQLProfilerInterface as an entrypoint for the nosql profiler.
2. Added the NoSQLMetric as an abstract class.
3. Implemented the interface for the MongoDB database source.
4. Implemented an e2e test using testcontainers.

* added profiler support for mongodb connection

* doc

* use int_admin_ometa in test setup

* - fixed linting issue in gx
- removed unused inheritance

* moved the nosql function into the metric class

* feat(profiler): add dynamodb row count

* feat(profiler): add dynamodb row count

* formatting

* validate_compose: raise exception for bad status code.

* fixed import

* format

* feat(nosql-profiler): added sample data

1. Implemented the NoSQL sampler.
2. Some naming changes to the NoSQL adaptor to avoid fixing names with the profiler interface.
3. Tests.

* added default sample limit

* formatting

* fixed import

* feat(profiler): dynamodb sample data

* tests for dynamo db sample data

* format

* format

* use service connection for nosql adaptor factory

* fixed tests

* format

* fixed after merge
2024-04-22 17:46:40 +02:00

94 lines
2.8 KiB
Python

import pytest
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.metadataIngestion.databaseServiceProfilerPipeline import (
ProfilerConfigType,
)
from metadata.generated.schema.metadataIngestion.workflow import (
LogLevels,
OpenMetadataWorkflowConfig,
Sink,
Source,
SourceConfig,
WorkflowConfig,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.workflow.metadata import MetadataWorkflow
from metadata.workflow.profiler import ProfilerWorkflow
@pytest.fixture(autouse=True, scope="module")
def ingest_metadata(
db_service: DatabaseService, metadata: OpenMetadata, ingest_sample_data
):
workflow_config = OpenMetadataWorkflowConfig(
source=Source(
type=db_service.serviceType.name.lower(),
serviceName=db_service.fullyQualifiedName.__root__,
sourceConfig=SourceConfig(config={}),
serviceConnection=db_service.connection,
),
sink=Sink(
type="metadata-rest",
config={},
),
workflowConfig=WorkflowConfig(openMetadataServerConfig=metadata.config),
)
metadata_ingestion = MetadataWorkflow.create(workflow_config)
metadata_ingestion.execute()
metadata_ingestion.raise_from_status()
return
@pytest.fixture(scope="module")
def db_fqn(db_service: DatabaseService):
return ".".join(
[
db_service.fullyQualifiedName.__root__,
"default",
"default",
]
)
def test_sample_data(db_service, db_fqn, metadata):
workflow_config = {
"source": {
"type": db_service.serviceType.name.lower(),
"serviceName": db_service.fullyQualifiedName.__root__,
"sourceConfig": {
"config": {
"type": ProfilerConfigType.Profiler.value,
},
},
},
"processor": {
"type": "orm-profiler",
"config": {},
},
"sink": {
"type": "metadata-rest",
"config": {},
},
"workflowConfig": {
"loggerLevel": LogLevels.DEBUG,
"openMetadataServerConfig": metadata.config.dict(),
},
}
profiler_workflow = ProfilerWorkflow.create(workflow_config)
profiler_workflow.execute()
profiler_workflow.raise_from_status()
table = metadata.list_entities(
Table,
fields=["fullyQualifiedName"],
params={
"databaseSchema": db_fqn,
},
).entities[0]
sample_data = metadata.get_sample_data(table).sampleData
assert sample_data is not None
assert len(sample_data.columns) == 2
assert len(sample_data.rows) == 2
assert sample_data.rows[0][0] == "Alice"