From 4f82ab0557d9e22eb90d6b45ee4b41b144ffe9c1 Mon Sep 17 00:00:00 2001 From: Ayush Shah Date: Tue, 12 Aug 2025 10:10:01 +0530 Subject: [PATCH] MINOR: Enhance Sample Data with Owner and Descriptions (#22872) * Enhance Sample Data Generation: Update table and column limits, add description and owner fields to table creation requests in sample_data.py * Refactor SampleDataSource: Improve readability by adjusting conditional formatting for owner checks in sample_data.py * Reduced number of tables per schema to 10 * Update sample_data.py: Reduce the maximum number of columns per table from 2000 to 200 for improved data generation efficiency --- .../ingestion/source/database/sample_data.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/sample_data.py b/ingestion/src/metadata/ingestion/source/database/sample_data.py index a5667bffa1b..034bfdfe5d4 100644 --- a/ingestion/src/metadata/ingestion/source/database/sample_data.py +++ b/ingestion/src/metadata/ingestion/source/database/sample_data.py @@ -151,6 +151,7 @@ from metadata.generated.schema.type.entityLineage import ( LineageDetails, ) from metadata.generated.schema.type.entityReference import EntityReference +from metadata.generated.schema.type.entityReferenceList import EntityReferenceList from metadata.generated.schema.type.lifeCycle import AccessDetails, LifeCycle from metadata.generated.schema.type.schema import Topic as TopicSchema from metadata.ingestion.api.common import Entity @@ -192,7 +193,7 @@ NUM_SERVICES = 1 DATABASES_PER_SERVICE = 5 SCHEMAS_PER_DATABASE = 5 TABLES_PER_SCHEMA = 10 -COLUMNS_PER_TABLE = 50 +COLUMNS_PER_TABLE = 200 NUM_THREADS = 10 BATCH_SIZE = 10 COLUMNS = [ @@ -2490,9 +2491,25 @@ class SampleDataSource( # Create with minimal required fields try: + owner = self.metadata.get_by_name(User, "admin") table_request = Either( right=CreateTableRequest( - name=table_name, databaseSchema=schema_fqn, columns=COLUMNS + name=table_name, + databaseSchema=schema_fqn, + columns=COLUMNS, + description=random.choice( + [f"This is {table_name} description.", None] + ), + owners=random.choice( + [ + EntityReferenceList( + [EntityReference(id=owner.id, type="user")] + ), + None, + ] + ) + if owner + else None, ) ) yield table_request