mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-20 23:18:01 +00:00
* fix: add cli support for computePassedFailedRowCount * fix: div zero error and improve empty table message * doc: updated test case page * style: ran python linting
This commit is contained in:
parent
647287951d
commit
ceaf205f59
@ -36,6 +36,7 @@ class TestCaseDefinition(ConfigModel):
|
|||||||
testDefinitionName: str
|
testDefinitionName: str
|
||||||
columnName: Optional[str] = None
|
columnName: Optional[str] = None
|
||||||
parameterValues: Optional[List[TestCaseParameterValue]]
|
parameterValues: Optional[List[TestCaseParameterValue]]
|
||||||
|
computePassedFailedRowCount: Optional[bool] = False
|
||||||
|
|
||||||
|
|
||||||
class TestSuiteProcessorConfig(ConfigModel):
|
class TestSuiteProcessorConfig(ConfigModel):
|
||||||
|
@ -251,13 +251,14 @@ class TestCaseRunner(Processor):
|
|||||||
if test_case_to_update.name == test_case.name.__root__
|
if test_case_to_update.name == test_case.name.__root__
|
||||||
)
|
)
|
||||||
updated_test_case = self.metadata.patch_test_case_definition(
|
updated_test_case = self.metadata.patch_test_case_definition(
|
||||||
source=test_case,
|
test_case=test_case,
|
||||||
entity_link=entity_link.get_entity_link(
|
entity_link=entity_link.get_entity_link(
|
||||||
Table,
|
Table,
|
||||||
fqn=table_fqn,
|
fqn=table_fqn,
|
||||||
column_name=test_case_definition.columnName,
|
column_name=test_case_definition.columnName,
|
||||||
),
|
),
|
||||||
test_case_parameter_values=test_case_definition.parameterValues,
|
test_case_parameter_values=test_case_definition.parameterValues,
|
||||||
|
compute_passed_failed_row_count=test_case_definition.computePassedFailedRowCount,
|
||||||
)
|
)
|
||||||
if updated_test_case:
|
if updated_test_case:
|
||||||
test_cases.pop(indx)
|
test_cases.pop(indx)
|
||||||
|
@ -113,7 +113,7 @@ class BaseTestValidator(ABC):
|
|||||||
sampleData=None,
|
sampleData=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
if (row_count is not None) and (
|
if (row_count is not None and row_count != 0) and (
|
||||||
# we'll need at least one of these to be not None to compute the other
|
# we'll need at least one of these to be not None to compute the other
|
||||||
(failed_rows is not None)
|
(failed_rows is not None)
|
||||||
or (passed_rows is not None)
|
or (passed_rows is not None)
|
||||||
|
@ -79,7 +79,10 @@ class SQAValidatorMixin:
|
|||||||
|
|
||||||
if res is None:
|
if res is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Query on table/column {column.name if column is not None else ''} returned None"
|
f"\nQuery on table/column {column.name if column is not None else ''} returned None. Your table might be empty. "
|
||||||
|
"If you confirmed your table is not empty and are still seeing this message you can:\n"
|
||||||
|
"\t1. check the documentation: https://docs.open-metadata.org/v1.3.x/connectors/ingestion/workflows/data-quality/tests\n"
|
||||||
|
"\t2. reach out to the Collate team for support"
|
||||||
)
|
)
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
@ -234,9 +234,10 @@ class OMetaPatchMixin(OMetaPatchMixinBase):
|
|||||||
|
|
||||||
def patch_test_case_definition(
|
def patch_test_case_definition(
|
||||||
self,
|
self,
|
||||||
source: TestCase,
|
test_case: TestCase,
|
||||||
entity_link: str,
|
entity_link: str,
|
||||||
test_case_parameter_values: Optional[List[TestCaseParameterValue]] = None,
|
test_case_parameter_values: Optional[List[TestCaseParameterValue]] = None,
|
||||||
|
compute_passed_failed_row_count: Optional[bool] = False,
|
||||||
) -> Optional[TestCase]:
|
) -> Optional[TestCase]:
|
||||||
"""Given a test case and a test case definition JSON PATCH the test case
|
"""Given a test case and a test case definition JSON PATCH the test case
|
||||||
|
|
||||||
@ -245,7 +246,7 @@ class OMetaPatchMixin(OMetaPatchMixinBase):
|
|||||||
test_case_definition: test case definition to add
|
test_case_definition: test case definition to add
|
||||||
"""
|
"""
|
||||||
source: TestCase = self._fetch_entity_if_exists(
|
source: TestCase = self._fetch_entity_if_exists(
|
||||||
entity=TestCase, entity_id=source.id, fields=["testDefinition", "testSuite"]
|
entity=TestCase, entity_id=test_case.id, fields=["testDefinition", "testSuite"] # type: ignore
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
|
|
||||||
if not source:
|
if not source:
|
||||||
@ -256,6 +257,8 @@ class OMetaPatchMixin(OMetaPatchMixinBase):
|
|||||||
destination.entityLink = EntityLink(__root__=entity_link)
|
destination.entityLink = EntityLink(__root__=entity_link)
|
||||||
if test_case_parameter_values:
|
if test_case_parameter_values:
|
||||||
destination.parameterValues = test_case_parameter_values
|
destination.parameterValues = test_case_parameter_values
|
||||||
|
if compute_passed_failed_row_count != source.computePassedFailedRowCount:
|
||||||
|
destination.computePassedFailedRowCount = compute_passed_failed_row_count
|
||||||
|
|
||||||
return self.patch(entity=TestCase, source=source, destination=destination)
|
return self.patch(entity=TestCase, source=source, destination=destination)
|
||||||
|
|
||||||
|
@ -127,7 +127,10 @@ def ometa_to_sqa_orm(
|
|||||||
{
|
{
|
||||||
"__tablename__": str(table.name.__root__),
|
"__tablename__": str(table.name.__root__),
|
||||||
"__table_args__": {
|
"__table_args__": {
|
||||||
"schema": orm_schema_name,
|
# SQLite does not support schemas
|
||||||
|
"schema": orm_schema_name
|
||||||
|
if table.serviceType != databaseService.DatabaseServiceType.SQLite
|
||||||
|
else None,
|
||||||
"extend_existing": True, # Recreates the table ORM object if it already exists. Useful for testing
|
"extend_existing": True, # Recreates the table ORM object if it already exists. Useful for testing
|
||||||
"quote": check_snowflake_case_sensitive(
|
"quote": check_snowflake_case_sensitive(
|
||||||
table.serviceType, table.name.__root__
|
table.serviceType, table.name.__root__
|
||||||
|
@ -71,9 +71,10 @@ test_suite_config = {
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "table_column_name_to_exists",
|
"name": "table_column_to_be_not_null",
|
||||||
"testDefinitionName": "tableColumnNameToExist",
|
"testDefinitionName": "columnValuesToBeNotNull",
|
||||||
"parameterValues": [{"name": "columnName", "value": "id"}],
|
"columnName": "id",
|
||||||
|
"computePassedFailedRowCount": True,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
@ -94,7 +95,16 @@ Base = declarative_base()
|
|||||||
|
|
||||||
|
|
||||||
class User(Base):
|
class User(Base):
|
||||||
__tablename__ = ("users",)
|
__tablename__ = "users"
|
||||||
|
id = sqa.Column(sqa.Integer, primary_key=True)
|
||||||
|
name = sqa.Column(sqa.String(256))
|
||||||
|
fullname = sqa.Column(sqa.String(256))
|
||||||
|
nickname = sqa.Column(sqa.String(256))
|
||||||
|
age = sqa.Column(sqa.Integer)
|
||||||
|
|
||||||
|
|
||||||
|
class EmptyUser(Base):
|
||||||
|
__tablename__ = "empty_users"
|
||||||
id = sqa.Column(sqa.Integer, primary_key=True)
|
id = sqa.Column(sqa.Integer, primary_key=True)
|
||||||
name = sqa.Column(sqa.String(256))
|
name = sqa.Column(sqa.String(256))
|
||||||
fullname = sqa.Column(sqa.String(256))
|
fullname = sqa.Column(sqa.String(256))
|
||||||
@ -159,11 +169,25 @@ class TestE2EWorkflow(unittest.TestCase):
|
|||||||
databaseSchema=database_schema.fullyQualifiedName,
|
databaseSchema=database_schema.fullyQualifiedName,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
cls.metadata.create_or_update(
|
||||||
|
CreateTableRequest(
|
||||||
|
name="empty_users",
|
||||||
|
columns=[
|
||||||
|
Column(name="id", dataType=DataType.INT),
|
||||||
|
Column(name="name", dataType=DataType.STRING),
|
||||||
|
Column(name="fullname", dataType=DataType.STRING),
|
||||||
|
Column(name="nickname", dataType=DataType.STRING),
|
||||||
|
Column(name="age", dataType=DataType.INT),
|
||||||
|
],
|
||||||
|
databaseSchema=database_schema.fullyQualifiedName,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
engine = sqa.create_engine(f"sqlite:///{cls.sqlite_conn.config.databaseMode}")
|
engine = sqa.create_engine(f"sqlite:///{cls.sqlite_conn.config.databaseMode}")
|
||||||
session = Session(bind=engine)
|
session = Session(bind=engine)
|
||||||
|
|
||||||
User.__table__.create(bind=engine)
|
User.__table__.create(bind=engine)
|
||||||
|
EmptyUser.__table__.create(bind=engine)
|
||||||
|
|
||||||
for _ in range(10):
|
for _ in range(10):
|
||||||
data = [
|
data = [
|
||||||
@ -212,18 +236,33 @@ class TestE2EWorkflow(unittest.TestCase):
|
|||||||
|
|
||||||
def test_e2e_cli_workflow(self):
|
def test_e2e_cli_workflow(self):
|
||||||
"""test cli workflow e2e"""
|
"""test cli workflow e2e"""
|
||||||
|
parameters = [
|
||||||
|
{"table_name": "users", "status": "Success"},
|
||||||
|
{"table_name": "empty_users", "status": "Aborted"},
|
||||||
|
]
|
||||||
|
|
||||||
|
for param in parameters:
|
||||||
|
with self.subTest(param=param):
|
||||||
|
table_name = param["table_name"]
|
||||||
|
status = param["status"]
|
||||||
|
test_suite_config["source"]["sourceConfig"]["config"].update(
|
||||||
|
{
|
||||||
|
"entityFullyQualifiedName": f"test_suite_service_test.test_suite_database.test_suite_database_schema.{table_name}"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
workflow = TestSuiteWorkflow.create(test_suite_config)
|
workflow = TestSuiteWorkflow.create(test_suite_config)
|
||||||
workflow.execute()
|
workflow.execute()
|
||||||
workflow.raise_from_status()
|
workflow.raise_from_status()
|
||||||
|
|
||||||
test_case_1 = self.metadata.get_by_name(
|
test_case_1 = self.metadata.get_by_name(
|
||||||
entity=TestCase,
|
entity=TestCase,
|
||||||
fqn="test_suite_service_test.test_suite_database.test_suite_database_schema.users.my_test_case",
|
fqn=f"test_suite_service_test.test_suite_database.test_suite_database_schema.{table_name}.my_test_case",
|
||||||
fields=["testDefinition", "testSuite"],
|
fields=["testDefinition", "testSuite"],
|
||||||
)
|
)
|
||||||
test_case_2 = self.metadata.get_by_name(
|
test_case_2 = self.metadata.get_by_name(
|
||||||
entity=TestCase,
|
entity=TestCase,
|
||||||
fqn="test_suite_service_test.test_suite_database.test_suite_database_schema.users.table_column_name_to_exists",
|
fqn=f"test_suite_service_test.test_suite_database.test_suite_database_schema.{table_name}.id.table_column_to_be_not_null",
|
||||||
fields=["testDefinition", "testSuite"],
|
fields=["testDefinition", "testSuite"],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -231,19 +270,30 @@ class TestE2EWorkflow(unittest.TestCase):
|
|||||||
assert test_case_2
|
assert test_case_2
|
||||||
|
|
||||||
test_case_result_1 = self.metadata.client.get(
|
test_case_result_1 = self.metadata.client.get(
|
||||||
"/dataQuality/testCases/test_suite_service_test.test_suite_database.test_suite_database_schema.users.my_test_case/testCaseResult",
|
f"/dataQuality/testCases/test_suite_service_test.test_suite_database.test_suite_database_schema.{table_name}"
|
||||||
|
".my_test_case/testCaseResult",
|
||||||
data={
|
data={
|
||||||
"startTs": int((datetime.now() - timedelta(days=3)).timestamp()),
|
"startTs": int((datetime.now() - timedelta(days=3)).timestamp())
|
||||||
"endTs": int((datetime.now() + timedelta(days=3)).timestamp()),
|
* 1000,
|
||||||
|
"endTs": int((datetime.now() + timedelta(days=3)).timestamp())
|
||||||
|
* 1000,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
test_case_result_2 = self.metadata.client.get(
|
test_case_result_2 = self.metadata.client.get(
|
||||||
"/dataQuality/testCases/test_suite_service_test.test_suite_database.test_suite_database_schema.users.table_column_name_to_exists/testCaseResult",
|
f"/dataQuality/testCases/test_suite_service_test.test_suite_database.test_suite_database_schema.{table_name}"
|
||||||
|
".id.table_column_to_be_not_null/testCaseResult",
|
||||||
data={
|
data={
|
||||||
"startTs": int((datetime.now() - timedelta(days=3)).timestamp()),
|
"startTs": int((datetime.now() - timedelta(days=3)).timestamp())
|
||||||
"endTs": int((datetime.now() + timedelta(days=3)).timestamp()),
|
* 1000,
|
||||||
|
"endTs": int((datetime.now() + timedelta(days=3)).timestamp())
|
||||||
|
* 1000,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
assert test_case_result_1
|
data_test_case_result_1: dict = test_case_result_1.get("data") # type: ignore
|
||||||
assert test_case_result_2
|
data_test_case_result_2: dict = test_case_result_2.get("data") # type: ignore
|
||||||
|
|
||||||
|
assert data_test_case_result_1
|
||||||
|
assert data_test_case_result_1[0]["testCaseStatus"] == "Success"
|
||||||
|
assert data_test_case_result_2
|
||||||
|
assert data_test_case_result_2[0]["testCaseStatus"] == status
|
||||||
|
@ -293,11 +293,11 @@ Validate a list of table column name matches an expected set of columns
|
|||||||
|
|
||||||
### Table Custom SQL Test
|
### Table Custom SQL Test
|
||||||
Write you own SQL test. When writting your query you can use 2 strategies:
|
Write you own SQL test. When writting your query you can use 2 strategies:
|
||||||
- `ROWS` (default): expects the query to be written as `SELECT <field>, <field> FROM <foo> WHERE <condition>`. **Note** if your query returns a large amount of rows it might cause an "Out Of Memeory" error. In this case we recoomend you to use the `COUNT` startegy.
|
- `ROWS` (default): expects the query to be written as `SELECT <field>, <field> FROM <foo> WHERE <condition>`. **Note** if your query returns a large amount of rows it might cause an "Out Of Memeory" error. In this case we recomend you to use the `COUNT` strategy.
|
||||||
- `COUNT`: expects the query to be written as `SELECT COUNT(<field>) FROM <foo> WHERE <condition>`.
|
- `COUNT`: expects the query to be written as `SELECT COUNT(<field>) FROM <foo> WHERE <condition>`.
|
||||||
|
|
||||||
**How to use the Threshold Parameter?**
|
**How to use the Threshold Parameter?**
|
||||||
The threshold allows you to define a limit for which you test should pass or fail - by defaut this number is 0. For example if my custom SQL query test returns 10 rows and my threshold is 5 the test will fail. If I update my threshold to 11 on my next run my test will pass.
|
The threshold allows you to define a limit for which you test should pass or fail - by defaut this number is 0. For example if my custom SQL query test returns 10 rows (or a COUNT value of 10) and my threshold is 5 the test will fail. If I update my threshold to 11 on my next run my test will pass.
|
||||||
|
|
||||||
**Properties**
|
**Properties**
|
||||||
|
|
||||||
@ -473,6 +473,7 @@ Makes sure that there are no duplicate values in a given column.
|
|||||||
description: test description
|
description: test description
|
||||||
columnName: columnName
|
columnName: columnName
|
||||||
testDefinitionName: columnValuesToBeUnique
|
testDefinitionName: columnValuesToBeUnique
|
||||||
|
computePassedFailedRowCount: <true or false>
|
||||||
parameterValues:
|
parameterValues:
|
||||||
- name: columnNames
|
- name: columnNames
|
||||||
value: true
|
value: true
|
||||||
@ -516,6 +517,7 @@ Validates that there are no null values in the column.
|
|||||||
description: test description
|
description: test description
|
||||||
columnName: columnName
|
columnName: columnName
|
||||||
testDefinitionName: columnValuesToBeNotNull
|
testDefinitionName: columnValuesToBeNotNull
|
||||||
|
computePassedFailedRowCount: <true or false>
|
||||||
parameterValues:
|
parameterValues:
|
||||||
- name: columnValuesToBeNotNull
|
- name: columnValuesToBeNotNull
|
||||||
value: true
|
value: true
|
||||||
@ -569,6 +571,7 @@ The other databases will fall back to the `LIKE` expression
|
|||||||
description: test description
|
description: test description
|
||||||
columnName: columnName
|
columnName: columnName
|
||||||
testDefinitionName: columnValuesToMatchRegex
|
testDefinitionName: columnValuesToMatchRegex
|
||||||
|
computePassedFailedRowCount: <true or false>
|
||||||
parameterValues:
|
parameterValues:
|
||||||
- name: regex
|
- name: regex
|
||||||
value: "%something%"
|
value: "%something%"
|
||||||
@ -622,6 +625,7 @@ The other databases will fall back to the `LIKE` expression
|
|||||||
description: test description
|
description: test description
|
||||||
columnName: columnName
|
columnName: columnName
|
||||||
testDefinitionName: columnValuesToMatchRegex
|
testDefinitionName: columnValuesToMatchRegex
|
||||||
|
computePassedFailedRowCount: <true or false>
|
||||||
parameterValues:
|
parameterValues:
|
||||||
- name: forbiddenRegex
|
- name: forbiddenRegex
|
||||||
value: "%something%"
|
value: "%something%"
|
||||||
@ -661,8 +665,11 @@ Validate values form a set are present in a column.
|
|||||||
**YAML Config**
|
**YAML Config**
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
testDefinitionName: columnValuesToBeInSet
|
- name: myTestName
|
||||||
parameterValues:
|
testDefinitionName: columnValuesToBeInSet
|
||||||
|
columnName: columnName
|
||||||
|
computePassedFailedRowCount: <true or false>
|
||||||
|
parameterValues:
|
||||||
- name: allowedValues
|
- name: allowedValues
|
||||||
value: ["forbidden1", "forbidden2"]
|
value: ["forbidden1", "forbidden2"]
|
||||||
```
|
```
|
||||||
@ -708,6 +715,7 @@ Validate that there are no values in a column in a set of forbidden values.
|
|||||||
description: test description
|
description: test description
|
||||||
columnName: columnName
|
columnName: columnName
|
||||||
testDefinitionName: columnValuesToBeNotInSet
|
testDefinitionName: columnValuesToBeNotInSet
|
||||||
|
computePassedFailedRowCount: <true or false>
|
||||||
parameterValues:
|
parameterValues:
|
||||||
- name: forbiddenValues
|
- name: forbiddenValues
|
||||||
value: ["forbidden1", "forbidden2"]
|
value: ["forbidden1", "forbidden2"]
|
||||||
@ -762,6 +770,7 @@ Any of those two need to be informed.
|
|||||||
description: test description
|
description: test description
|
||||||
columnName: columnName
|
columnName: columnName
|
||||||
testDefinitionName: columnValuesToBeBetween
|
testDefinitionName: columnValuesToBeBetween
|
||||||
|
computePassedFailedRowCount: <true or false>
|
||||||
parameterValues:
|
parameterValues:
|
||||||
- name: minValue
|
- name: minValue
|
||||||
value: ["forbidden1", "forbidden2"]
|
value: ["forbidden1", "forbidden2"]
|
||||||
@ -893,6 +902,7 @@ Any of those two need to be informed.
|
|||||||
description: test description
|
description: test description
|
||||||
columnName: columnName
|
columnName: columnName
|
||||||
testDefinitionName: columnValueLengthsToBeBetween
|
testDefinitionName: columnValueLengthsToBeBetween
|
||||||
|
computePassedFailedRowCount: <true or false>
|
||||||
parameterValues:
|
parameterValues:
|
||||||
- name: minLength
|
- name: minLength
|
||||||
value: 50
|
value: 50
|
||||||
|
Loading…
x
Reference in New Issue
Block a user