mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-15 10:48:31 +00:00
MINOR - Fix column to match set test (#15186)
* fix: column value test for SQA types * style: ran python linting
This commit is contained in:
parent
bdf27458e5
commit
ba8208222e
@ -16,6 +16,7 @@ Validator for table column to match set test case
|
||||
import collections
|
||||
import traceback
|
||||
from abc import abstractmethod
|
||||
from typing import List
|
||||
|
||||
from metadata.data_quality.validations.base_test_handler import BaseTestValidator
|
||||
from metadata.generated.schema.tests.basic import (
|
||||
@ -93,5 +94,5 @@ class BaseTableColumnToMatchSetValidator(BaseTestValidator):
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def _run_results(self):
|
||||
def _run_results(self) -> List[str]:
|
||||
raise NotImplementedError
|
||||
|
@ -13,6 +13,7 @@
|
||||
Validator for table column name to match set test case
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
from metadata.data_quality.validations.mixins.pandas_validator_mixin import (
|
||||
PandasValidatorMixin,
|
||||
@ -30,7 +31,7 @@ class TableColumnToMatchSetValidator(
|
||||
):
|
||||
"""Validator table column name to match set test case"""
|
||||
|
||||
def _run_results(self):
|
||||
def _run_results(self) -> List[str]:
|
||||
"""compute result of the test case"""
|
||||
names = list(self.runner[0].columns)
|
||||
if not names:
|
||||
|
@ -14,9 +14,10 @@ Validator for table column name to match set test case
|
||||
"""
|
||||
|
||||
|
||||
from typing import Optional
|
||||
from typing import List, cast
|
||||
|
||||
from sqlalchemy import inspect
|
||||
from sqlalchemy.sql.base import ColumnCollection
|
||||
|
||||
from metadata.data_quality.validations.mixins.sqa_validator_mixin import (
|
||||
SQAValidatorMixin,
|
||||
@ -34,11 +35,15 @@ class TableColumnToMatchSetValidator(
|
||||
):
|
||||
"""Validator for table column name to match set test case"""
|
||||
|
||||
def _run_results(self) -> Optional[int]:
|
||||
def _run_results(self) -> List[str]:
|
||||
"""compute result of the test case"""
|
||||
names = inspect(self.runner.table).c
|
||||
if not names:
|
||||
raise ValueError(
|
||||
f"Column names for test case {self.test_case.name} returned None"
|
||||
)
|
||||
names = cast(
|
||||
ColumnCollection, names
|
||||
) # satisfy type checker for names.keys() access
|
||||
names = list(names.keys())
|
||||
return names
|
||||
|
@ -158,10 +158,10 @@ class BigtableSource(CommonNoSQLSource, MultiDBSource):
|
||||
records = [{"row_key": b"row_key"}]
|
||||
# In order to get a "good" sample of data, we try to distribute the sampling
|
||||
# across multiple column families.
|
||||
for cf in list(column_families.keys())[:MAX_COLUMN_FAMILIES]:
|
||||
for column_family in list(column_families.keys())[:MAX_COLUMN_FAMILIES]:
|
||||
records.extend(
|
||||
self._get_records_for_column_family(
|
||||
table, cf, SAMPLES_PER_COLUMN_FAMILY
|
||||
table, column_family, SAMPLES_PER_COLUMN_FAMILY
|
||||
)
|
||||
)
|
||||
if len(records) >= GLOBAL_SAMPLE_SIZE:
|
||||
|
@ -39,22 +39,22 @@ class Row(BaseModel):
|
||||
@classmethod
|
||||
def from_partial_row(cls, row: PartialRowData):
|
||||
cells = {}
|
||||
for cf, cf_cells in row.cells.items():
|
||||
cells.setdefault(cf, {})
|
||||
for column_family, cf_cells in row.cells.items():
|
||||
cells.setdefault(column_family, {})
|
||||
for column, cell in cf_cells.items():
|
||||
cells[cf][column] = Cell(
|
||||
cells[column_family][column] = Cell(
|
||||
values=[Value(timestamp=c.timestamp, value=c.value) for c in cell]
|
||||
)
|
||||
return cls(cells=cells, row_key=row.row_key)
|
||||
|
||||
def to_record(self) -> Dict[str, bytes]:
|
||||
record = {}
|
||||
for cf, cells in self.cells.items():
|
||||
for column_family, cells in self.cells.items():
|
||||
for column, cell in cells.items():
|
||||
# Since each cell can have multiple values and the API returns them in descending order
|
||||
# from latest to oldest, we only take the latest value. This probably does not matter since
|
||||
# all we care about is data types and all data stored in BigTable is of type `bytes`.
|
||||
record[f"{cf}.{column.decode()}"] = cell.values[0].value
|
||||
record[f"{column_family}.{column.decode()}"] = cell.values[0].value
|
||||
record["row_key"] = self.row_key
|
||||
|
||||
return record
|
||||
|
Loading…
x
Reference in New Issue
Block a user