mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-23 16:38:17 +00:00
fix: pass rnd table bound columns to sample query (#13561)
This commit is contained in:
parent
c705586a63
commit
31d2595e4f
@ -134,7 +134,14 @@ class SQASampler(SamplerInterface):
|
|||||||
if not columns:
|
if not columns:
|
||||||
sqa_columns = [col for col in inspect(rnd).c if col.name != RANDOM_LABEL]
|
sqa_columns = [col for col in inspect(rnd).c if col.name != RANDOM_LABEL]
|
||||||
else:
|
else:
|
||||||
sqa_columns = list(columns) # copy columns
|
# we can't directly use columns as it is bound to self.table and not the rnd table.
|
||||||
|
# If we use it, it will result in a cross join between self.table and rnd table
|
||||||
|
names = [col.name for col in columns]
|
||||||
|
sqa_columns = [
|
||||||
|
col
|
||||||
|
for col in inspect(rnd).c
|
||||||
|
if col.name != RANDOM_LABEL and col.name in names
|
||||||
|
]
|
||||||
|
|
||||||
sqa_sample = (
|
sqa_sample = (
|
||||||
self.client.query(*sqa_columns)
|
self.client.query(*sqa_columns)
|
||||||
|
@ -690,3 +690,78 @@ class ProfilerWorkflowTest(TestCase):
|
|||||||
assert sorted([c.__root__ for c in sample_data.sampleData.columns]) == sorted(
|
assert sorted([c.__root__ for c in sample_data.sampleData.columns]) == sorted(
|
||||||
["id", "age"]
|
["id", "age"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_sample_data_ingestion(self):
|
||||||
|
"""test the rows of the sample data are what we expect"""
|
||||||
|
workflow_config = deepcopy(ingestion_config)
|
||||||
|
workflow_config["source"]["sourceConfig"]["config"].update(
|
||||||
|
{
|
||||||
|
"type": "Profiler",
|
||||||
|
"tableFilterPattern": {"includes": ["users"]},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
workflow_config["processor"] = {
|
||||||
|
"type": "orm-profiler",
|
||||||
|
"config": {
|
||||||
|
"profiler": {
|
||||||
|
"name": "my_profiler",
|
||||||
|
"timeout_seconds": 60,
|
||||||
|
"metrics": ["row_count", "min", "max", "COUNT", "null_count"],
|
||||||
|
},
|
||||||
|
"tableConfig": [
|
||||||
|
{
|
||||||
|
"fullyQualifiedName": "test_sqlite.main.main.users",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
profiler_workflow = ProfilerWorkflow.create(workflow_config)
|
||||||
|
profiler_workflow.execute()
|
||||||
|
status = profiler_workflow.result_status()
|
||||||
|
profiler_workflow.stop()
|
||||||
|
|
||||||
|
assert status == 0
|
||||||
|
|
||||||
|
table = self.metadata.get_by_name(
|
||||||
|
entity=Table,
|
||||||
|
fqn="test_sqlite.main.main.users",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test we are getting the expected sample data
|
||||||
|
expected_sample_data = [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
"John",
|
||||||
|
"John Doe",
|
||||||
|
"johnny b goode",
|
||||||
|
30,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2,
|
||||||
|
"Jane",
|
||||||
|
"Jone Doe",
|
||||||
|
None,
|
||||||
|
31,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3,
|
||||||
|
"Joh",
|
||||||
|
"Joh Doe",
|
||||||
|
None,
|
||||||
|
37,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
4,
|
||||||
|
"Jae",
|
||||||
|
"Jae Doe",
|
||||||
|
None,
|
||||||
|
38,
|
||||||
|
],
|
||||||
|
]
|
||||||
|
sample_data = self.metadata.get_sample_data(table).sampleData.rows
|
||||||
|
sample_data = [data[:-1] for data in sample_data] # remove timestamp as dynamic
|
||||||
|
self.assertListEqual(
|
||||||
|
sorted(sample_data),
|
||||||
|
sorted(expected_sample_data),
|
||||||
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user