mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-22 07:58:06 +00:00
MINOR: Remove default 100 when profileSample
is None (#19672)
* fix: remove default 100% percent * fix: use get_dataset * fix: orm_profiler tests
This commit is contained in:
parent
76935f5c2e
commit
28bd01c471
@ -76,9 +76,6 @@ class SamplerInterface(ABC):
|
||||
self._columns: Optional[List[SQALikeColumn]] = None
|
||||
self.sample_config = sample_config
|
||||
|
||||
if not self.sample_config.profileSample:
|
||||
self.sample_config.profileSample = 100
|
||||
|
||||
self.entity = entity
|
||||
self.include_columns = include_columns
|
||||
self.exclude_columns = exclude_columns
|
||||
|
@ -162,23 +162,23 @@ class SQASampler(SamplerInterface, SQAInterfaceMixin):
|
||||
return self._fetch_sample_data_from_user_query()
|
||||
|
||||
# Add new RandomNumFn column
|
||||
rnd = self.get_sample_query()
|
||||
ds = self.get_dataset()
|
||||
if not columns:
|
||||
sqa_columns = [col for col in inspect(rnd).c if col.name != RANDOM_LABEL]
|
||||
sqa_columns = [col for col in inspect(ds).c if col.name != RANDOM_LABEL]
|
||||
else:
|
||||
# we can't directly use columns as it is bound to self.raw_dataset and not the rnd table.
|
||||
# If we use it, it will result in a cross join between self.raw_dataset and rnd table
|
||||
names = [col.name for col in columns]
|
||||
sqa_columns = [
|
||||
col
|
||||
for col in inspect(rnd).c
|
||||
for col in inspect(ds).c
|
||||
if col.name != RANDOM_LABEL and col.name in names
|
||||
]
|
||||
|
||||
try:
|
||||
sqa_sample = (
|
||||
self.client.query(*sqa_columns)
|
||||
.select_from(rnd)
|
||||
.select_from(ds)
|
||||
.limit(self.sample_limit)
|
||||
.all()
|
||||
)
|
||||
|
@ -549,8 +549,7 @@ def test_workflow_values_partition(ingest, metadata, service_name):
|
||||
profile = metadata.get_latest_table_profile(table.fullyQualifiedName).profile
|
||||
|
||||
assert profile.rowCount == 4.0
|
||||
# If we don't have any sample, default to 100
|
||||
assert profile.profileSample == 100.0
|
||||
assert profile.profileSample == None
|
||||
|
||||
workflow_config["processor"] = {
|
||||
"type": "orm-profiler",
|
||||
|
@ -65,7 +65,7 @@ class ProfilerTestParameters:
|
||||
ColumnProfile(
|
||||
name="three",
|
||||
timestamp=Timestamp(0),
|
||||
valuesCount=1,
|
||||
valuesCount=2,
|
||||
nullCount=1,
|
||||
)
|
||||
],
|
||||
@ -101,7 +101,7 @@ class ProfilerTestParameters:
|
||||
ColumnProfile(
|
||||
name="gender",
|
||||
timestamp=Timestamp(0),
|
||||
valuesCount=932,
|
||||
valuesCount=1000,
|
||||
nullCount=0,
|
||||
)
|
||||
],
|
||||
|
Loading…
x
Reference in New Issue
Block a user