mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-23 08:28:10 +00:00
fix: pass rnd table bound columns to sample query (#13561)
This commit is contained in:
parent
c705586a63
commit
31d2595e4f
@ -134,7 +134,14 @@ class SQASampler(SamplerInterface):
|
||||
if not columns:
|
||||
sqa_columns = [col for col in inspect(rnd).c if col.name != RANDOM_LABEL]
|
||||
else:
|
||||
sqa_columns = list(columns) # copy columns
|
||||
# we can't directly use columns as it is bound to self.table and not the rnd table.
|
||||
# If we use it, it will result in a cross join between self.table and rnd table
|
||||
names = [col.name for col in columns]
|
||||
sqa_columns = [
|
||||
col
|
||||
for col in inspect(rnd).c
|
||||
if col.name != RANDOM_LABEL and col.name in names
|
||||
]
|
||||
|
||||
sqa_sample = (
|
||||
self.client.query(*sqa_columns)
|
||||
|
@ -690,3 +690,78 @@ class ProfilerWorkflowTest(TestCase):
|
||||
assert sorted([c.__root__ for c in sample_data.sampleData.columns]) == sorted(
|
||||
["id", "age"]
|
||||
)
|
||||
|
||||
def test_sample_data_ingestion(self):
|
||||
"""test the rows of the sample data are what we expect"""
|
||||
workflow_config = deepcopy(ingestion_config)
|
||||
workflow_config["source"]["sourceConfig"]["config"].update(
|
||||
{
|
||||
"type": "Profiler",
|
||||
"tableFilterPattern": {"includes": ["users"]},
|
||||
}
|
||||
)
|
||||
workflow_config["processor"] = {
|
||||
"type": "orm-profiler",
|
||||
"config": {
|
||||
"profiler": {
|
||||
"name": "my_profiler",
|
||||
"timeout_seconds": 60,
|
||||
"metrics": ["row_count", "min", "max", "COUNT", "null_count"],
|
||||
},
|
||||
"tableConfig": [
|
||||
{
|
||||
"fullyQualifiedName": "test_sqlite.main.main.users",
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
profiler_workflow = ProfilerWorkflow.create(workflow_config)
|
||||
profiler_workflow.execute()
|
||||
status = profiler_workflow.result_status()
|
||||
profiler_workflow.stop()
|
||||
|
||||
assert status == 0
|
||||
|
||||
table = self.metadata.get_by_name(
|
||||
entity=Table,
|
||||
fqn="test_sqlite.main.main.users",
|
||||
)
|
||||
|
||||
# Test we are getting the expected sample data
|
||||
expected_sample_data = [
|
||||
[
|
||||
1,
|
||||
"John",
|
||||
"John Doe",
|
||||
"johnny b goode",
|
||||
30,
|
||||
],
|
||||
[
|
||||
2,
|
||||
"Jane",
|
||||
"Jone Doe",
|
||||
None,
|
||||
31,
|
||||
],
|
||||
[
|
||||
3,
|
||||
"Joh",
|
||||
"Joh Doe",
|
||||
None,
|
||||
37,
|
||||
],
|
||||
[
|
||||
4,
|
||||
"Jae",
|
||||
"Jae Doe",
|
||||
None,
|
||||
38,
|
||||
],
|
||||
]
|
||||
sample_data = self.metadata.get_sample_data(table).sampleData.rows
|
||||
sample_data = [data[:-1] for data in sample_data] # remove timestamp as dynamic
|
||||
self.assertListEqual(
|
||||
sorted(sample_data),
|
||||
sorted(expected_sample_data),
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user