Minor: Add extended glossary (#14918)

* Add Nested glossary to extended sample data

* Fix lint

* Add Stress test config

* Fix Lint
This commit is contained in:
Ayush Shah 2024-01-31 20:43:04 +05:30 committed by GitHub
parent a715bdef3e
commit 0d66c0e514
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 207 additions and 137 deletions

View File

@ -9,6 +9,8 @@ source:
connectionOptions: connectionOptions:
sampleDataFolder: "./examples/sample_data" sampleDataFolder: "./examples/sample_data"
extendedSampleDataFolder: "./examples/extended_sample_data" extendedSampleDataFolder: "./examples/extended_sample_data"
includeGlossary: True
includeLineageStressTesting: True
sourceConfig: {} sourceConfig: {}
sink: sink:
type: metadata-rest type: metadata-rest

View File

@ -24,6 +24,10 @@ from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequ
from metadata.generated.schema.api.data.createDatabaseSchema import ( from metadata.generated.schema.api.data.createDatabaseSchema import (
CreateDatabaseSchemaRequest, CreateDatabaseSchemaRequest,
) )
from metadata.generated.schema.api.data.createGlossary import CreateGlossaryRequest
from metadata.generated.schema.api.data.createGlossaryTerm import (
CreateGlossaryTermRequest,
)
from metadata.generated.schema.api.data.createTable import CreateTableRequest from metadata.generated.schema.api.data.createTable import CreateTableRequest
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
from metadata.generated.schema.entity.data.dashboard import Dashboard from metadata.generated.schema.entity.data.dashboard import Dashboard
@ -91,10 +95,20 @@ class ExtendedSampleDataSource(Source): # pylint: disable=too-many-instance-att
self.store_table_fqn = set() self.store_table_fqn = set()
self.store_data_model_fqn = [] self.store_data_model_fqn = []
self.store_dashboard_fqn = [] self.store_dashboard_fqn = []
self.main_glossary = None
self.glossary_term_list = []
sample_data_folder = self.service_connection.connectionOptions.__root__.get( sample_data_folder = self.service_connection.connectionOptions.__root__.get(
"sampleDataFolder" "sampleDataFolder"
) )
self.include_glossary = self.service_connection.connectionOptions.__root__.get(
"includeGlossary"
)
self.include_lineage_stress_testing = (
self.service_connection.connectionOptions.__root__.get(
"includeLineageStressTesting"
)
)
extneded_sample_data_folder = ( extneded_sample_data_folder = (
self.service_connection.connectionOptions.__root__.get( self.service_connection.connectionOptions.__root__.get(
"extendedSampleDataFolder" "extendedSampleDataFolder"
@ -225,13 +239,20 @@ class ExtendedSampleDataSource(Source): # pylint: disable=too-many-instance-att
) )
) )
def generate_sample_data(self): # pylint: disable=too-many-locals def generate_sample_data(
self,
): # pylint: disable=too-many-locals,too-many-statements
""" """
Generate sample data for dashboard and database service, Generate sample data for dashboard and database service,
with lineage between them, having long names, special characters and description with lineage between them, having long names, special characters and description
""" """
if self.include_glossary:
db = self.create_database_request("extended_sample_data", self.generate_text()) yield from self.create_glossary()
yield from self.create_glossary_term()
if self.include_lineage_stress_testing:
db = self.create_database_request(
"extended_sample_data", self.generate_text()
)
yield Either(right=db) yield Either(right=db)
schema = self.create_database_schema_request( schema = self.create_database_schema_request(
@ -313,9 +334,13 @@ class ExtendedSampleDataSource(Source): # pylint: disable=too-many-instance-att
schema_name=schema.name.__root__, schema_name=schema.name.__root__,
table_name=table_request.name.__root__, table_name=table_request.name.__root__,
) )
from_table = self.metadata.get_by_name(entity=Table, fqn=table_entity_fqn) from_table = self.metadata.get_by_name(
entity=Table, fqn=table_entity_fqn
)
yield from self.create_depth_nodes(from_table=from_table, to_table=to_table) yield from self.create_depth_nodes(
from_table=from_table, to_table=to_table
)
self.dashboard_service_json["name"] = name self.dashboard_service_json["name"] = name
self.dashboard_service_json["description"] = text self.dashboard_service_json["description"] = text
@ -404,6 +429,49 @@ class ExtendedSampleDataSource(Source): # pylint: disable=too-many-instance-att
) )
) )
def create_glossary(self):
self.main_glossary = CreateGlossaryRequest(
name="NestedGlossaryTest",
displayName="NestedGlossaryTest",
description="Description of test glossary",
)
yield Either(right=self.main_glossary)
def create_glossary_term(self):
"""
Create Glossary Terms
"""
for _ in range(20):
random_name = self.fake.first_name()
yield Either(
right=CreateGlossaryTermRequest(
glossary="NestedGlossaryTest",
name=random_name,
displayName=random_name,
description="Test glossary term ",
parent=self.glossary_term_list[-1]
if len(self.glossary_term_list) > 3
else None,
)
)
if len(self.glossary_term_list) > 3 and self.glossary_term_list[-1]:
self.glossary_term_list.append(
f"{self.glossary_term_list[-1]}.{random_name}"
)
else:
self.glossary_term_list.append(f"NestedGlossaryTest.{random_name}")
for _ in range(500):
random_name = self.fake.first_name()
yield Either(
right=CreateGlossaryTermRequest(
glossary="NestedGlossaryTest",
name=random_name,
displayName=random_name,
description="Test glossary term 1",
)
)
def generate_name(self): def generate_name(self):
return f"Sample-@!3_(%t3st@)%_^{self.fake.name()}" return f"Sample-@!3_(%t3st@)%_^{self.fake.name()}"