mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-01 11:09:14 +00:00
* Added table tests to great expectations submodule * Added new tests to GE submodule
This commit is contained in:
parent
04c8e49edd
commit
0cb036de08
@ -11,13 +11,13 @@ pip install openmetadata-ingestion[great-expectations]
|
||||
action:
|
||||
module_name: metadata.great_expectations.action
|
||||
class_name: OpenMetadataValidationAction
|
||||
config_file_path: path/to/ometa/config/file/confg.yml
|
||||
config_file_path: path/to/ometa/config/file/
|
||||
ometa_service_name: my_service_name
|
||||
```
|
||||
`ometa_service_name` is optional. If you don't specify it, when looking for the table entity it will look for the service name where the table entity name exist. If the same table entity name exists in more than 1 service name it will raise an error.
|
||||
|
||||
|
||||
The `config.yml` file holds connection details to your Open Metadata instance, e.g.
|
||||
The `config.yaml` file holds connection details to your Open Metadata instance, e.g.
|
||||
|
||||
```yml
|
||||
hostPort: http://localhost:8585/api
|
||||
|
||||
@ -155,7 +155,7 @@ class OpenMetadataValidationAction(ValidationAction):
|
||||
if self.ometa_service_name:
|
||||
return self.ometa_conn.get_by_name(
|
||||
entity=Table,
|
||||
fqdn=f"{self.ometa_service_name}.{database}.{schema_name}.{table_name}",
|
||||
fqn=f"{self.ometa_service_name}.{database}.{schema_name}.{table_name}",
|
||||
)
|
||||
|
||||
table_entity = [
|
||||
|
||||
@ -75,10 +75,25 @@ class BaseColumnTestBuilder(ABC):
|
||||
testCaseStatus=TestCaseStatus.Success
|
||||
if self.result["success"]
|
||||
else TestCaseStatus.Failed,
|
||||
result="Failing rows percentage: "
|
||||
f"{self.result['result']['unexpected_percent']}",
|
||||
result=self._get_expectation_result(),
|
||||
)
|
||||
|
||||
def _get_expectation_result(self):
|
||||
"""Get the expectation result"""
|
||||
if self.result["result"]:
|
||||
if self.result["result"].get("unexpected_percent"):
|
||||
return (
|
||||
"Failing rows percentage: "
|
||||
f"{str(self.result['result'].get('unexpected_percent'))}"
|
||||
)
|
||||
if self.result["result"].get("observed_value"):
|
||||
return (
|
||||
"Observed values: "
|
||||
f"{str(self.result['result'].get('observed_value'))}"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def build_test_request(self, *, config, test_type) -> CreateColumnTestRequest:
|
||||
"""Build a test case request to add the test to the tabe
|
||||
|
||||
|
||||
@ -0,0 +1,38 @@
|
||||
# Copyright 2022 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
TestCase builder
|
||||
"""
|
||||
|
||||
from metadata.generated.schema.api.tests.createColumnTest import CreateColumnTestRequest
|
||||
from metadata.generated.schema.tests.column import columnValueMaxToBeBetween
|
||||
from metadata.generated.schema.tests.columnTest import ColumnTestType
|
||||
from metadata.great_expectations.builders.column.base_column_test_builder import (
|
||||
BaseColumnTestBuilder,
|
||||
)
|
||||
|
||||
|
||||
class ColumnValueMaxToBeBetweenBuilder(BaseColumnTestBuilder):
|
||||
"""Builder for `expect_column_value_lengths_to_be_between` GE expectation"""
|
||||
|
||||
def _build_test(self) -> CreateColumnTestRequest:
|
||||
"""Specific test builder for the test"""
|
||||
return self.build_test_request(
|
||||
config=columnValueMaxToBeBetween.ColumnValueMaxToBeBetween(
|
||||
minValueForMaxInCol=self.result["expectation_config"]["kwargs"].get(
|
||||
"min_value"
|
||||
),
|
||||
maxValueForMaxInCol=self.result["expectation_config"]["kwargs"].get(
|
||||
"max_value"
|
||||
),
|
||||
),
|
||||
test_type=ColumnTestType.columnValueMaxToBeBetween,
|
||||
)
|
||||
@ -0,0 +1,38 @@
|
||||
# Copyright 2022 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
TestCase builder
|
||||
"""
|
||||
|
||||
from metadata.generated.schema.api.tests.createColumnTest import CreateColumnTestRequest
|
||||
from metadata.generated.schema.tests.column import columnValueMinToBeBetween
|
||||
from metadata.generated.schema.tests.columnTest import ColumnTestType
|
||||
from metadata.great_expectations.builders.column.base_column_test_builder import (
|
||||
BaseColumnTestBuilder,
|
||||
)
|
||||
|
||||
|
||||
class ColumnValueMinToBeBetweenBuilder(BaseColumnTestBuilder):
|
||||
"""Builder for `expect_column_value_lengths_to_be_between` GE expectation"""
|
||||
|
||||
def _build_test(self) -> CreateColumnTestRequest:
|
||||
"""Specific test builder for the test"""
|
||||
return self.build_test_request(
|
||||
config=columnValueMinToBeBetween.ColumnValueMinToBeBetween(
|
||||
minValueForMinInCol=self.result["expectation_config"]["kwargs"].get(
|
||||
"min_value"
|
||||
),
|
||||
maxValueForMinInCol=self.result["expectation_config"]["kwargs"].get(
|
||||
"max_value"
|
||||
),
|
||||
),
|
||||
test_type=ColumnTestType.columnValueMinToBeBetween,
|
||||
)
|
||||
@ -0,0 +1,38 @@
|
||||
# Copyright 2022 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
TestCase builder
|
||||
"""
|
||||
|
||||
from metadata.generated.schema.api.tests.createColumnTest import CreateColumnTestRequest
|
||||
from metadata.generated.schema.tests.column import columnValuesSumToBeBetween
|
||||
from metadata.generated.schema.tests.columnTest import ColumnTestType
|
||||
from metadata.great_expectations.builders.column.base_column_test_builder import (
|
||||
BaseColumnTestBuilder,
|
||||
)
|
||||
|
||||
|
||||
class ColumnValueSumToBeBetweenBuilder(BaseColumnTestBuilder):
|
||||
"""Builder for `expect_column_value_lengths_to_be_between` GE expectation"""
|
||||
|
||||
def _build_test(self) -> CreateColumnTestRequest:
|
||||
"""Specific test builder for the test"""
|
||||
return self.build_test_request(
|
||||
config=columnValuesSumToBeBetween.ColumnValuesSumToBeBetween(
|
||||
minValueForColSum=self.result["expectation_config"]["kwargs"].get(
|
||||
"min_value"
|
||||
),
|
||||
maxValueForColSum=self.result["expectation_config"]["kwargs"].get(
|
||||
"max_value"
|
||||
),
|
||||
),
|
||||
test_type=ColumnTestType.columnValuesSumToBeBetween,
|
||||
)
|
||||
@ -0,0 +1,33 @@
|
||||
# Copyright 2022 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
TestCase builder
|
||||
"""
|
||||
|
||||
from metadata.generated.schema.api.tests.createColumnTest import CreateColumnTestRequest
|
||||
from metadata.generated.schema.tests.column import columnValuesToBeInSet
|
||||
from metadata.generated.schema.tests.columnTest import ColumnTestType
|
||||
from metadata.great_expectations.builders.column.base_column_test_builder import (
|
||||
BaseColumnTestBuilder,
|
||||
)
|
||||
|
||||
|
||||
class ColumnValuesToBeInSetBuilder(BaseColumnTestBuilder):
|
||||
"""Builder for `expect_column_values_to_not_be_in_set` GE expectation"""
|
||||
|
||||
def _build_test(self) -> CreateColumnTestRequest:
|
||||
"""Specific test builder for the test"""
|
||||
return self.build_test_request(
|
||||
config=columnValuesToBeInSet.ColumnValuesToBeInSet(
|
||||
allowedValues=self.result["expectation_config"]["kwargs"]["value_set"],
|
||||
),
|
||||
test_type=ColumnTestType.columnValuesToBeInSet,
|
||||
)
|
||||
@ -12,12 +12,24 @@
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from metadata.great_expectations.builders.column.value_max_to_be_between import (
|
||||
ColumnValueMaxToBeBetweenBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.column.value_min_to_be_between import (
|
||||
ColumnValueMinToBeBetweenBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.column.values_lengths_to_be_between import (
|
||||
ColumnValuesLengthsToBeBetweenBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.column.values_sum_to_be_between import (
|
||||
ColumnValueSumToBeBetweenBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.column.values_to_be_between import (
|
||||
ColumnValuesToBeBetweenBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.column.values_to_be_in_set import (
|
||||
ColumnValuesToBeInSetBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.column.values_to_be_not_in_set import (
|
||||
ColumnValuesToBeNotInSetBuilder,
|
||||
)
|
||||
@ -30,9 +42,18 @@ from metadata.great_expectations.builders.column.values_to_be_unique import (
|
||||
from metadata.great_expectations.builders.column.values_to_match_regex import (
|
||||
ColumnValuesToMatchRegexBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.table.column_count_to_be_between import (
|
||||
TableColumnCountToBeBetweenBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.table.column_count_to_equal import (
|
||||
TableColumCountToEqualBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.table.column_name_to_exist import (
|
||||
TableColumnNameToExistBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.table.column_name_to_match_set import (
|
||||
TableColumnNameToMatchSetBuilder,
|
||||
)
|
||||
from metadata.great_expectations.builders.table.row_count_to_be_between import (
|
||||
TableRowCountToBeBetweenBuilder,
|
||||
)
|
||||
@ -54,3 +75,10 @@ class SupportedGETests(Enum):
|
||||
expect_column_values_to_not_be_null = ColumnValuesToBeNotNullBuilder()
|
||||
expect_column_values_to_be_unique = ColumnValuesToBeUniqueBuilder()
|
||||
expect_column_values_to_match_regex = ColumnValuesToMatchRegexBuilder()
|
||||
expect_table_column_count_to_be_between = TableColumnCountToBeBetweenBuilder()
|
||||
expect_column_to_exist = TableColumnNameToExistBuilder()
|
||||
expect_table_columns_to_match_set = TableColumnNameToMatchSetBuilder()
|
||||
expect_column_values_to_be_in_set = ColumnValuesToBeInSetBuilder()
|
||||
expect_column_max_to_be_between = ColumnValueMaxToBeBetweenBuilder()
|
||||
expect_column_min_to_be_between = ColumnValueMinToBeBetweenBuilder()
|
||||
expect_column_sum_to_be_between = ColumnValueSumToBeBetweenBuilder()
|
||||
|
||||
@ -77,9 +77,18 @@ class BaseTableTestBuilder(ABC):
|
||||
testCaseStatus=TestCaseStatus.Success
|
||||
if self.result["success"]
|
||||
else TestCaseStatus.Failed,
|
||||
result=self.result["result"]["observed_value"],
|
||||
result=self._get_expectation_result(),
|
||||
)
|
||||
|
||||
def _get_expectation_result(self):
|
||||
"""Get the expectation result"""
|
||||
if self.result["result"]:
|
||||
if isinstance(self.result["result"]["observed_value"], list):
|
||||
return ", ".join(self.result["result"].get("observed_value"))
|
||||
return self.result["result"]["observed_value"]
|
||||
|
||||
return None
|
||||
|
||||
def build_test_request(self, *, config, test_type) -> CreateTableTestRequest:
|
||||
"""Build a test case request to add the test to the tabe
|
||||
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
# Copyright 2022 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
TestCase builder for table column count
|
||||
"""
|
||||
|
||||
from metadata.generated.schema.api.tests.createTableTest import CreateTableTestRequest
|
||||
from metadata.generated.schema.tests.table import tableColumnCountToBeBetween
|
||||
from metadata.generated.schema.tests.tableTest import TableTestType
|
||||
from metadata.great_expectations.builders.table.base_table_test_builders import (
|
||||
BaseTableTestBuilder,
|
||||
)
|
||||
|
||||
|
||||
class TableColumnCountToBeBetweenBuilder(BaseTableTestBuilder):
|
||||
"""Builder for `expect_table_row_count_to_be_between` GE expectation"""
|
||||
|
||||
def _build_test(self) -> CreateTableTestRequest:
|
||||
"""Specific test builder for the test"""
|
||||
return self.build_test_request(
|
||||
config=tableColumnCountToBeBetween.TableColumnCountToBeBetween(
|
||||
minColValue=self.result["expectation_config"]["kwargs"]["min_value"],
|
||||
maxColValue=self.result["expectation_config"]["kwargs"]["max_value"],
|
||||
),
|
||||
test_type=TableTestType.tableColumnCountToBeBetween,
|
||||
)
|
||||
@ -0,0 +1,33 @@
|
||||
# Copyright 2022 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
TestCase builder for table column count
|
||||
"""
|
||||
|
||||
from metadata.generated.schema.api.tests.createTableTest import CreateTableTestRequest
|
||||
from metadata.generated.schema.tests.table import tableColumnNameToExist
|
||||
from metadata.generated.schema.tests.tableTest import TableTestType
|
||||
from metadata.great_expectations.builders.table.base_table_test_builders import (
|
||||
BaseTableTestBuilder,
|
||||
)
|
||||
|
||||
|
||||
class TableColumnNameToExistBuilder(BaseTableTestBuilder):
|
||||
"""Builder for `expect_table_row_count_to_be_between` GE expectation"""
|
||||
|
||||
def _build_test(self) -> CreateTableTestRequest:
|
||||
"""Specific test builder for the test"""
|
||||
return self.build_test_request(
|
||||
config=tableColumnNameToExist.TableColumnNameToExist(
|
||||
columnName=self.result["expectation_config"]["kwargs"]["column"],
|
||||
),
|
||||
test_type=TableTestType.tableColumnNameToExist,
|
||||
)
|
||||
@ -0,0 +1,37 @@
|
||||
# Copyright 2022 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
TestCase builder for table column count
|
||||
"""
|
||||
|
||||
from metadata.generated.schema.api.tests.createTableTest import CreateTableTestRequest
|
||||
from metadata.generated.schema.tests.table import tableColumnToMatchSet
|
||||
from metadata.generated.schema.tests.tableTest import TableTestType
|
||||
from metadata.great_expectations.builders.table.base_table_test_builders import (
|
||||
BaseTableTestBuilder,
|
||||
)
|
||||
|
||||
|
||||
class TableColumnNameToMatchSetBuilder(BaseTableTestBuilder):
|
||||
"""Builder for `expect_table_row_count_to_be_between` GE expectation"""
|
||||
|
||||
def _build_test(self) -> CreateTableTestRequest:
|
||||
"""Specific test builder for the test"""
|
||||
return self.build_test_request(
|
||||
config=tableColumnToMatchSet.TableColumnToMatchSet(
|
||||
columnNames=",".join(
|
||||
self.result["expectation_config"]["kwargs"]["column_set"]
|
||||
),
|
||||
ordered=self.result["expectation_config"]["kwargs"].get("exact_match")
|
||||
or False,
|
||||
),
|
||||
test_type=TableTestType.tableColumnToMatchSet,
|
||||
)
|
||||
@ -69,9 +69,13 @@ def render_template(environment: Environment, template_file: str = "config.yml")
|
||||
tmplt = environment.get_template(template_file)
|
||||
return tmplt.render()
|
||||
except TemplateNotFound as err:
|
||||
raise TemplateNotFound(
|
||||
f"Config file at {environment.loader.searchpath} not found"
|
||||
) from err
|
||||
try:
|
||||
tmplt = environment.get_template("config.yaml")
|
||||
return tmplt.render()
|
||||
except TemplateNotFound as err:
|
||||
raise TemplateNotFound(
|
||||
f"Config file at {environment.loader.searchpath} not found"
|
||||
) from err
|
||||
|
||||
|
||||
def create_ometa_connection_obj(config: str) -> OpenMetadataConnection:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user