mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-22 16:08:13 +00:00
MINOR: Remove default 100 when profileSample
is None (#19672)
* fix: remove default 100% percent * fix: use get_dataset * fix: orm_profiler tests
This commit is contained in:
parent
76935f5c2e
commit
28bd01c471
@ -76,9 +76,6 @@ class SamplerInterface(ABC):
|
|||||||
self._columns: Optional[List[SQALikeColumn]] = None
|
self._columns: Optional[List[SQALikeColumn]] = None
|
||||||
self.sample_config = sample_config
|
self.sample_config = sample_config
|
||||||
|
|
||||||
if not self.sample_config.profileSample:
|
|
||||||
self.sample_config.profileSample = 100
|
|
||||||
|
|
||||||
self.entity = entity
|
self.entity = entity
|
||||||
self.include_columns = include_columns
|
self.include_columns = include_columns
|
||||||
self.exclude_columns = exclude_columns
|
self.exclude_columns = exclude_columns
|
||||||
|
@ -162,23 +162,23 @@ class SQASampler(SamplerInterface, SQAInterfaceMixin):
|
|||||||
return self._fetch_sample_data_from_user_query()
|
return self._fetch_sample_data_from_user_query()
|
||||||
|
|
||||||
# Add new RandomNumFn column
|
# Add new RandomNumFn column
|
||||||
rnd = self.get_sample_query()
|
ds = self.get_dataset()
|
||||||
if not columns:
|
if not columns:
|
||||||
sqa_columns = [col for col in inspect(rnd).c if col.name != RANDOM_LABEL]
|
sqa_columns = [col for col in inspect(ds).c if col.name != RANDOM_LABEL]
|
||||||
else:
|
else:
|
||||||
# we can't directly use columns as it is bound to self.raw_dataset and not the rnd table.
|
# we can't directly use columns as it is bound to self.raw_dataset and not the rnd table.
|
||||||
# If we use it, it will result in a cross join between self.raw_dataset and rnd table
|
# If we use it, it will result in a cross join between self.raw_dataset and rnd table
|
||||||
names = [col.name for col in columns]
|
names = [col.name for col in columns]
|
||||||
sqa_columns = [
|
sqa_columns = [
|
||||||
col
|
col
|
||||||
for col in inspect(rnd).c
|
for col in inspect(ds).c
|
||||||
if col.name != RANDOM_LABEL and col.name in names
|
if col.name != RANDOM_LABEL and col.name in names
|
||||||
]
|
]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sqa_sample = (
|
sqa_sample = (
|
||||||
self.client.query(*sqa_columns)
|
self.client.query(*sqa_columns)
|
||||||
.select_from(rnd)
|
.select_from(ds)
|
||||||
.limit(self.sample_limit)
|
.limit(self.sample_limit)
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
@ -549,8 +549,7 @@ def test_workflow_values_partition(ingest, metadata, service_name):
|
|||||||
profile = metadata.get_latest_table_profile(table.fullyQualifiedName).profile
|
profile = metadata.get_latest_table_profile(table.fullyQualifiedName).profile
|
||||||
|
|
||||||
assert profile.rowCount == 4.0
|
assert profile.rowCount == 4.0
|
||||||
# If we don't have any sample, default to 100
|
assert profile.profileSample == None
|
||||||
assert profile.profileSample == 100.0
|
|
||||||
|
|
||||||
workflow_config["processor"] = {
|
workflow_config["processor"] = {
|
||||||
"type": "orm-profiler",
|
"type": "orm-profiler",
|
||||||
|
@ -65,7 +65,7 @@ class ProfilerTestParameters:
|
|||||||
ColumnProfile(
|
ColumnProfile(
|
||||||
name="three",
|
name="three",
|
||||||
timestamp=Timestamp(0),
|
timestamp=Timestamp(0),
|
||||||
valuesCount=1,
|
valuesCount=2,
|
||||||
nullCount=1,
|
nullCount=1,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
@ -101,7 +101,7 @@ class ProfilerTestParameters:
|
|||||||
ColumnProfile(
|
ColumnProfile(
|
||||||
name="gender",
|
name="gender",
|
||||||
timestamp=Timestamp(0),
|
timestamp=Timestamp(0),
|
||||||
valuesCount=932,
|
valuesCount=1000,
|
||||||
nullCount=0,
|
nullCount=0,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
|
Loading…
x
Reference in New Issue
Block a user