MINOR: Enhance Sample Data with Owner and Descriptions (#22872)

* Enhance Sample Data Generation: Update table and column limits, add description and owner fields to table creation requests in sample_data.py

* Refactor SampleDataSource: Improve readability by adjusting conditional formatting for owner checks in sample_data.py

* Reduced number of tables per schema to 10

* Update sample_data.py: Reduce the maximum number of columns per table from 2000 to 200 for improved data generation efficiency
This commit is contained in:
Ayush Shah 2025-08-12 10:10:01 +05:30 committed by GitHub
parent 76bd3d2ba4
commit 4f82ab0557
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -151,6 +151,7 @@ from metadata.generated.schema.type.entityLineage import (
LineageDetails,
)
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.generated.schema.type.entityReferenceList import EntityReferenceList
from metadata.generated.schema.type.lifeCycle import AccessDetails, LifeCycle
from metadata.generated.schema.type.schema import Topic as TopicSchema
from metadata.ingestion.api.common import Entity
@ -192,7 +193,7 @@ NUM_SERVICES = 1
DATABASES_PER_SERVICE = 5
SCHEMAS_PER_DATABASE = 5
TABLES_PER_SCHEMA = 10
COLUMNS_PER_TABLE = 50
COLUMNS_PER_TABLE = 200
NUM_THREADS = 10
BATCH_SIZE = 10
COLUMNS = [
@ -2490,9 +2491,25 @@ class SampleDataSource(
# Create with minimal required fields
try:
owner = self.metadata.get_by_name(User, "admin")
table_request = Either(
right=CreateTableRequest(
name=table_name, databaseSchema=schema_fqn, columns=COLUMNS
name=table_name,
databaseSchema=schema_fqn,
columns=COLUMNS,
description=random.choice(
[f"This is {table_name} description.", None]
),
owners=random.choice(
[
EntityReferenceList(
[EntityReference(id=owner.id, type="user")]
),
None,
]
)
if owner
else None,
)
)
yield table_request