mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-11 16:31:57 +00:00
sample data for object store service and containers (#10336)
* sample data for object store service and containers * formatting * linting
This commit is contained in:
parent
2d65cf02b0
commit
bbf54afaf5
198
ingestion/examples/sample_data/objectcontainers/containers.json
Normal file
198
ingestion/examples/sample_data/objectcontainers/containers.json
Normal file
@ -0,0 +1,198 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "transactions",
|
||||||
|
"displayName": "Company Transactions",
|
||||||
|
"description": "Bucket containing all the company's transactions",
|
||||||
|
"parent": null,
|
||||||
|
"prefix": "/transactions/",
|
||||||
|
"dataModel": {
|
||||||
|
"isPartitioned": true,
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"name": "transaction_id",
|
||||||
|
"dataType": "NUMERIC",
|
||||||
|
"dataTypeDisplay": "numeric",
|
||||||
|
"description": "The ID of the executed transaction. This column is the primary key for this table.",
|
||||||
|
"tags": [],
|
||||||
|
"constraint": "PRIMARY_KEY",
|
||||||
|
"ordinalPosition": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "merchant",
|
||||||
|
"dataType": "VARCHAR",
|
||||||
|
"dataLength": 100,
|
||||||
|
"dataTypeDisplay": "varchar",
|
||||||
|
"description": "The merchant for this transaction.",
|
||||||
|
"tags": [],
|
||||||
|
"ordinalPosition": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "transaction_time",
|
||||||
|
"dataType": "TIMESTAMP",
|
||||||
|
"dataTypeDisplay": "timestamp",
|
||||||
|
"description": "The time the transaction took place.",
|
||||||
|
"tags": [],
|
||||||
|
"ordinalPosition": 3
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"numberOfObjects": "50",
|
||||||
|
"size": "102400",
|
||||||
|
"fileFormats": [
|
||||||
|
"parquet"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "departments",
|
||||||
|
"displayName": "Company departments",
|
||||||
|
"description": "Bucket containing company department information",
|
||||||
|
"parent": null,
|
||||||
|
"prefix": "/departments/",
|
||||||
|
"dataModel": null,
|
||||||
|
"numberOfObjects": "2",
|
||||||
|
"size": "2048",
|
||||||
|
"fileFormats": [
|
||||||
|
"csv"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "engineering",
|
||||||
|
"displayName": "Engineering department",
|
||||||
|
"description": "Bucket containing engineering department information",
|
||||||
|
"parent": "s3_object_store_sample.departments",
|
||||||
|
"prefix": "/departments/engineering/",
|
||||||
|
"dataModel": null,
|
||||||
|
"numberOfObjects": "5",
|
||||||
|
"size": "14336",
|
||||||
|
"fileFormats": [
|
||||||
|
"zip"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "finance",
|
||||||
|
"displayName": "Finance department",
|
||||||
|
"description": "Bucket containing finance department information",
|
||||||
|
"parent": "s3_object_store_sample.departments",
|
||||||
|
"prefix": "/departments/finance/",
|
||||||
|
"dataModel": {
|
||||||
|
"isPartitioned": false,
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"name": "department_id",
|
||||||
|
"dataType": "NUMERIC",
|
||||||
|
"dataTypeDisplay": "numeric",
|
||||||
|
"description": "The ID of the department. This column is the primary key for this table.",
|
||||||
|
"tags": [],
|
||||||
|
"constraint": "PRIMARY_KEY",
|
||||||
|
"ordinalPosition": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "budget_total_value",
|
||||||
|
"dataType": "NUMERIC",
|
||||||
|
"dataTypeDisplay": "numeric",
|
||||||
|
"description": "The department's budget for the current year.",
|
||||||
|
"tags": [],
|
||||||
|
"ordinalPosition": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "notes",
|
||||||
|
"dataType": "VARCHAR",
|
||||||
|
"dataLength": 100,
|
||||||
|
"dataTypeDisplay": "varchar",
|
||||||
|
"description": "Notes concerning sustainability for the budget.",
|
||||||
|
"tags": [],
|
||||||
|
"ordinalPosition": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "budget_executor",
|
||||||
|
"dataType": "VARCHAR",
|
||||||
|
"dataTypeDisplay": "varchar",
|
||||||
|
"description": "The responsible finance lead for the budget execution",
|
||||||
|
"tags": [],
|
||||||
|
"ordinalPosition": 4
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"numberOfObjects": "75",
|
||||||
|
"size": "286720",
|
||||||
|
"fileFormats": [
|
||||||
|
"zip",
|
||||||
|
"csv"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "media",
|
||||||
|
"displayName": "Media department",
|
||||||
|
"description": "Bucket containing media department information",
|
||||||
|
"parent": "s3_object_store_sample.departments",
|
||||||
|
"prefix": "/departments/media/",
|
||||||
|
"dataModel": null,
|
||||||
|
"numberOfObjects": "123",
|
||||||
|
"size": "243712",
|
||||||
|
"fileFormats": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "movies",
|
||||||
|
"displayName": "Company official footage",
|
||||||
|
"description": "Bucket containing movies about the company",
|
||||||
|
"parent": "s3_object_store_sample.departments.media",
|
||||||
|
"prefix": "/departments/media/media2020/",
|
||||||
|
"dataModel": null,
|
||||||
|
"numberOfObjects": "500",
|
||||||
|
"size": "15360000",
|
||||||
|
"fileFormats": [
|
||||||
|
"gz"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "expenditures",
|
||||||
|
"displayName": "Expenditures for the current year",
|
||||||
|
"description": "Bucket containing finance expenditures information",
|
||||||
|
"parent": "s3_object_store_sample.departments.finance",
|
||||||
|
"prefix": "/departments/finance/expenditures-2023",
|
||||||
|
"dataModel": {
|
||||||
|
"isPartitioned": false,
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"name": "department_id",
|
||||||
|
"dataType": "NUMERIC",
|
||||||
|
"dataTypeDisplay": "numeric",
|
||||||
|
"description": "The ID of the department. This column is the primary key for this table.",
|
||||||
|
"tags": [],
|
||||||
|
"constraint": "PRIMARY_KEY",
|
||||||
|
"ordinalPosition": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "approved",
|
||||||
|
"dataType": "BOOLEAN",
|
||||||
|
"dataTypeDisplay": "boolean",
|
||||||
|
"description": "Whether this was already approved by upper management",
|
||||||
|
"tags": [],
|
||||||
|
"ordinalPosition": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "fraudulent_claims",
|
||||||
|
"dataType": "BOOLEAN",
|
||||||
|
"dataTypeDisplay": "boolean",
|
||||||
|
"description": "Whether any claims were made for the expenditure at any point",
|
||||||
|
"tags": [],
|
||||||
|
"ordinalPosition": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "total_value_for_current_month",
|
||||||
|
"dataType": "NUMERIC",
|
||||||
|
"dataTypeDisplay": "numeric",
|
||||||
|
"description": "The current total value spent for the expenditure as of beginning of the current month",
|
||||||
|
"tags": [],
|
||||||
|
"ordinalPosition": 4
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"numberOfObjects": "10",
|
||||||
|
"size": "65536",
|
||||||
|
"fileFormats": [
|
||||||
|
"zstd",
|
||||||
|
"tsv"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
17
ingestion/examples/sample_data/objectcontainers/service.json
Normal file
17
ingestion/examples/sample_data/objectcontainers/service.json
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
"type": "s3",
|
||||||
|
"serviceName": "s3_object_store_sample",
|
||||||
|
"serviceConnection": {
|
||||||
|
"config": {
|
||||||
|
"type": "S3",
|
||||||
|
"awsConfig": {
|
||||||
|
"awsAccessKeyId": "aws_access_key_id",
|
||||||
|
"awsSecretAccessKey": "aws_secret_access_key",
|
||||||
|
"awsRegion": "awsRegion",
|
||||||
|
"endPointURL": "https://endpoint.com/"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sourceConfig": {
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -36,6 +36,7 @@ from metadata.generated.schema.entity.classification.classification import (
|
|||||||
)
|
)
|
||||||
from metadata.generated.schema.entity.classification.tag import Tag
|
from metadata.generated.schema.entity.classification.tag import Tag
|
||||||
from metadata.generated.schema.entity.data.chart import Chart
|
from metadata.generated.schema.entity.data.chart import Chart
|
||||||
|
from metadata.generated.schema.entity.data.container import Container
|
||||||
from metadata.generated.schema.entity.data.dashboard import Dashboard
|
from metadata.generated.schema.entity.data.dashboard import Dashboard
|
||||||
from metadata.generated.schema.entity.data.database import Database
|
from metadata.generated.schema.entity.data.database import Database
|
||||||
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||||
@ -60,6 +61,9 @@ from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipel
|
|||||||
from metadata.generated.schema.entity.services.messagingService import MessagingService
|
from metadata.generated.schema.entity.services.messagingService import MessagingService
|
||||||
from metadata.generated.schema.entity.services.metadataService import MetadataService
|
from metadata.generated.schema.entity.services.metadataService import MetadataService
|
||||||
from metadata.generated.schema.entity.services.mlmodelService import MlModelService
|
from metadata.generated.schema.entity.services.mlmodelService import MlModelService
|
||||||
|
from metadata.generated.schema.entity.services.objectstoreService import (
|
||||||
|
ObjectStoreService,
|
||||||
|
)
|
||||||
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
||||||
from metadata.generated.schema.entity.services.storageService import StorageService
|
from metadata.generated.schema.entity.services.storageService import StorageService
|
||||||
from metadata.generated.schema.entity.teams.role import Role
|
from metadata.generated.schema.entity.teams.role import Role
|
||||||
@ -321,6 +325,11 @@ class OpenMetadata(
|
|||||||
if issubclass(entity, get_args(Union[User, self.get_create_entity_type(User)])):
|
if issubclass(entity, get_args(Union[User, self.get_create_entity_type(User)])):
|
||||||
return "/users"
|
return "/users"
|
||||||
|
|
||||||
|
if issubclass(
|
||||||
|
entity, get_args(Union[Container, self.get_create_entity_type(Container)])
|
||||||
|
):
|
||||||
|
return "/containers"
|
||||||
|
|
||||||
# Services Schemas
|
# Services Schemas
|
||||||
if issubclass(
|
if issubclass(
|
||||||
entity,
|
entity,
|
||||||
@ -378,6 +387,16 @@ class OpenMetadata(
|
|||||||
):
|
):
|
||||||
return "/services/metadataServices"
|
return "/services/metadataServices"
|
||||||
|
|
||||||
|
if issubclass(
|
||||||
|
entity,
|
||||||
|
get_args(
|
||||||
|
Union[
|
||||||
|
ObjectStoreService, self.get_create_entity_type(ObjectStoreService)
|
||||||
|
]
|
||||||
|
),
|
||||||
|
):
|
||||||
|
return "/services/objectstoreServices"
|
||||||
|
|
||||||
if issubclass(
|
if issubclass(
|
||||||
entity,
|
entity,
|
||||||
IngestionPipeline,
|
IngestionPipeline,
|
||||||
|
|||||||
@ -21,6 +21,7 @@ from typing import Any, Dict, Iterable, List, Union
|
|||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
|
|
||||||
from metadata.generated.schema.api.data.createChart import CreateChartRequest
|
from metadata.generated.schema.api.data.createChart import CreateChartRequest
|
||||||
|
from metadata.generated.schema.api.data.createContainer import CreateContainerRequest
|
||||||
from metadata.generated.schema.api.data.createDashboard import CreateDashboardRequest
|
from metadata.generated.schema.api.data.createDashboard import CreateDashboardRequest
|
||||||
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
||||||
from metadata.generated.schema.api.data.createDatabaseSchema import (
|
from metadata.generated.schema.api.data.createDatabaseSchema import (
|
||||||
@ -43,6 +44,7 @@ from metadata.generated.schema.api.teams.createTeam import CreateTeamRequest
|
|||||||
from metadata.generated.schema.api.teams.createUser import CreateUserRequest
|
from metadata.generated.schema.api.teams.createUser import CreateUserRequest
|
||||||
from metadata.generated.schema.api.tests.createTestCase import CreateTestCaseRequest
|
from metadata.generated.schema.api.tests.createTestCase import CreateTestCaseRequest
|
||||||
from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest
|
from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest
|
||||||
|
from metadata.generated.schema.entity.data.container import Container
|
||||||
from metadata.generated.schema.entity.data.dashboard import Dashboard
|
from metadata.generated.schema.entity.data.dashboard import Dashboard
|
||||||
from metadata.generated.schema.entity.data.database import Database
|
from metadata.generated.schema.entity.data.database import Database
|
||||||
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||||
@ -71,6 +73,9 @@ from metadata.generated.schema.entity.services.dashboardService import Dashboard
|
|||||||
from metadata.generated.schema.entity.services.databaseService import DatabaseService
|
from metadata.generated.schema.entity.services.databaseService import DatabaseService
|
||||||
from metadata.generated.schema.entity.services.messagingService import MessagingService
|
from metadata.generated.schema.entity.services.messagingService import MessagingService
|
||||||
from metadata.generated.schema.entity.services.mlmodelService import MlModelService
|
from metadata.generated.schema.entity.services.mlmodelService import MlModelService
|
||||||
|
from metadata.generated.schema.entity.services.objectstoreService import (
|
||||||
|
ObjectStoreService,
|
||||||
|
)
|
||||||
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
||||||
from metadata.generated.schema.entity.services.storageService import StorageService
|
from metadata.generated.schema.entity.services.storageService import StorageService
|
||||||
from metadata.generated.schema.entity.teams.team import Team
|
from metadata.generated.schema.entity.teams.team import Team
|
||||||
@ -183,13 +188,14 @@ class SampleDataSourceStatus(SourceStatus):
|
|||||||
|
|
||||||
class SampleDataSource(
|
class SampleDataSource(
|
||||||
Source[Entity]
|
Source[Entity]
|
||||||
): # pylint: disable=too-many-instance-attributes,too-many-public-methods
|
): # pylint: disable=too-many-instance-attributes,too-many-public-methods,disable=too-many-lines,
|
||||||
"""
|
"""
|
||||||
Loads JSON data and prepares the required
|
Loads JSON data and prepares the required
|
||||||
python objects to be sent to the Sink.
|
python objects to be sent to the Sink.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
|
def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
|
||||||
|
# pylint: disable=too-many-statements
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.status = SampleDataSourceStatus()
|
self.status = SampleDataSourceStatus()
|
||||||
self.config = config
|
self.config = config
|
||||||
@ -400,6 +406,20 @@ class SampleDataSource(
|
|||||||
entity=MlModelService,
|
entity=MlModelService,
|
||||||
config=WorkflowSource(**self.model_service_json),
|
config=WorkflowSource(**self.model_service_json),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.object_service_json = json.load(
|
||||||
|
open( # pylint: disable=consider-using-with
|
||||||
|
sample_data_folder + "/objectcontainers/service.json",
|
||||||
|
"r",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.object_store_service = self.metadata.get_service_or_create(
|
||||||
|
entity=ObjectStoreService,
|
||||||
|
config=WorkflowSource(**self.object_service_json),
|
||||||
|
)
|
||||||
|
|
||||||
self.models = json.load(
|
self.models = json.load(
|
||||||
open( # pylint: disable=consider-using-with
|
open( # pylint: disable=consider-using-with
|
||||||
sample_data_folder + "/models/models.json",
|
sample_data_folder + "/models/models.json",
|
||||||
@ -407,6 +427,15 @@ class SampleDataSource(
|
|||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.containers = json.load(
|
||||||
|
open( # pylint: disable=consider-using-with
|
||||||
|
sample_data_folder + "/objectcontainers/containers.json",
|
||||||
|
"r",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
self.user_entity = {}
|
self.user_entity = {}
|
||||||
self.table_tests = json.load(
|
self.table_tests = json.load(
|
||||||
open( # pylint: disable=consider-using-with
|
open( # pylint: disable=consider-using-with
|
||||||
@ -471,6 +500,7 @@ class SampleDataSource(
|
|||||||
yield from self.ingest_lineage()
|
yield from self.ingest_lineage()
|
||||||
yield from self.ingest_pipeline_status()
|
yield from self.ingest_pipeline_status()
|
||||||
yield from self.ingest_mlmodels()
|
yield from self.ingest_mlmodels()
|
||||||
|
yield from self.ingest_containers()
|
||||||
yield from self.ingest_profiles()
|
yield from self.ingest_profiles()
|
||||||
yield from self.ingest_test_suite()
|
yield from self.ingest_test_suite()
|
||||||
yield from self.ingest_test_case()
|
yield from self.ingest_test_case()
|
||||||
@ -843,6 +873,45 @@ class SampleDataSource(
|
|||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
logger.warning(f"Error ingesting MlModel [{model}]: {exc}")
|
logger.warning(f"Error ingesting MlModel [{model}]: {exc}")
|
||||||
|
|
||||||
|
def ingest_containers(self) -> Iterable[CreateContainerRequest]:
|
||||||
|
"""
|
||||||
|
Convert sample containers data into a Container Entity
|
||||||
|
to feed the metastore
|
||||||
|
"""
|
||||||
|
|
||||||
|
for container in self.containers:
|
||||||
|
try:
|
||||||
|
# Fetch linked dashboard ID from name
|
||||||
|
parent_container_fqn = container.get("parent")
|
||||||
|
parent_container = None
|
||||||
|
if parent_container_fqn:
|
||||||
|
parent_container = self.metadata.get_by_name(
|
||||||
|
entity=Container, fqn=parent_container_fqn
|
||||||
|
)
|
||||||
|
if not parent_container:
|
||||||
|
raise InvalidSampleDataException(
|
||||||
|
f"Cannot find {parent_container_fqn} in Sample Containers"
|
||||||
|
)
|
||||||
|
|
||||||
|
container_request = CreateContainerRequest(
|
||||||
|
name=container["name"],
|
||||||
|
displayName=container["displayName"],
|
||||||
|
description=container["description"],
|
||||||
|
parent=EntityReference(id=parent_container.id, type="container")
|
||||||
|
if parent_container_fqn
|
||||||
|
else None,
|
||||||
|
prefix=container["prefix"],
|
||||||
|
dataModel=container.get("dataModel"),
|
||||||
|
numberOfObjects=container.get("numberOfObjects"),
|
||||||
|
size=container.get("size"),
|
||||||
|
fileFormats=container.get("fileFormats"),
|
||||||
|
service=self.object_store_service.fullyQualifiedName,
|
||||||
|
)
|
||||||
|
yield container_request
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
logger.warning(f"Error ingesting Container [{container}]: {exc}")
|
||||||
|
|
||||||
def ingest_users(self) -> Iterable[OMetaUserProfile]:
|
def ingest_users(self) -> Iterable[OMetaUserProfile]:
|
||||||
"""
|
"""
|
||||||
Ingest Sample User data
|
Ingest Sample User data
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user