Fixes #5787 -- Add new tests to GE integration submodule (#5788)

* Added table tests to great expectations submodule

* Added new tests to GE submodule
This commit is contained in:
Teddy 2022-06-30 11:59:55 +02:00 committed by GitHub
parent 04c8e49edd
commit 0cb036de08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 316 additions and 9 deletions

View File

@ -11,13 +11,13 @@ pip install openmetadata-ingestion[great-expectations]
action:
module_name: metadata.great_expectations.action
class_name: OpenMetadataValidationAction
config_file_path: path/to/ometa/config/file/confg.yml
config_file_path: path/to/ometa/config/file/
ometa_service_name: my_service_name
```
`ometa_service_name` is optional. If you don't specify it, when looking for the table entity it will look for the service name where the table entity name exist. If the same table entity name exists in more than 1 service name it will raise an error.
The `config.yml` file holds connection details to your Open Metadata instance, e.g.
The `config.yaml` file holds connection details to your Open Metadata instance, e.g.
```yml
hostPort: http://localhost:8585/api

View File

@ -155,7 +155,7 @@ class OpenMetadataValidationAction(ValidationAction):
if self.ometa_service_name:
return self.ometa_conn.get_by_name(
entity=Table,
fqdn=f"{self.ometa_service_name}.{database}.{schema_name}.{table_name}",
fqn=f"{self.ometa_service_name}.{database}.{schema_name}.{table_name}",
)
table_entity = [

View File

@ -75,10 +75,25 @@ class BaseColumnTestBuilder(ABC):
testCaseStatus=TestCaseStatus.Success
if self.result["success"]
else TestCaseStatus.Failed,
result="Failing rows percentage: "
f"{self.result['result']['unexpected_percent']}",
result=self._get_expectation_result(),
)
def _get_expectation_result(self):
"""Get the expectation result"""
if self.result["result"]:
if self.result["result"].get("unexpected_percent"):
return (
"Failing rows percentage: "
f"{str(self.result['result'].get('unexpected_percent'))}"
)
if self.result["result"].get("observed_value"):
return (
"Observed values: "
f"{str(self.result['result'].get('observed_value'))}"
)
return None
def build_test_request(self, *, config, test_type) -> CreateColumnTestRequest:
"""Build a test case request to add the test to the tabe

View File

@ -0,0 +1,38 @@
# Copyright 2022 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
TestCase builder
"""
from metadata.generated.schema.api.tests.createColumnTest import CreateColumnTestRequest
from metadata.generated.schema.tests.column import columnValueMaxToBeBetween
from metadata.generated.schema.tests.columnTest import ColumnTestType
from metadata.great_expectations.builders.column.base_column_test_builder import (
BaseColumnTestBuilder,
)
class ColumnValueMaxToBeBetweenBuilder(BaseColumnTestBuilder):
"""Builder for `expect_column_value_lengths_to_be_between` GE expectation"""
def _build_test(self) -> CreateColumnTestRequest:
"""Specific test builder for the test"""
return self.build_test_request(
config=columnValueMaxToBeBetween.ColumnValueMaxToBeBetween(
minValueForMaxInCol=self.result["expectation_config"]["kwargs"].get(
"min_value"
),
maxValueForMaxInCol=self.result["expectation_config"]["kwargs"].get(
"max_value"
),
),
test_type=ColumnTestType.columnValueMaxToBeBetween,
)

View File

@ -0,0 +1,38 @@
# Copyright 2022 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
TestCase builder
"""
from metadata.generated.schema.api.tests.createColumnTest import CreateColumnTestRequest
from metadata.generated.schema.tests.column import columnValueMinToBeBetween
from metadata.generated.schema.tests.columnTest import ColumnTestType
from metadata.great_expectations.builders.column.base_column_test_builder import (
BaseColumnTestBuilder,
)
class ColumnValueMinToBeBetweenBuilder(BaseColumnTestBuilder):
"""Builder for `expect_column_value_lengths_to_be_between` GE expectation"""
def _build_test(self) -> CreateColumnTestRequest:
"""Specific test builder for the test"""
return self.build_test_request(
config=columnValueMinToBeBetween.ColumnValueMinToBeBetween(
minValueForMinInCol=self.result["expectation_config"]["kwargs"].get(
"min_value"
),
maxValueForMinInCol=self.result["expectation_config"]["kwargs"].get(
"max_value"
),
),
test_type=ColumnTestType.columnValueMinToBeBetween,
)

View File

@ -0,0 +1,38 @@
# Copyright 2022 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
TestCase builder
"""
from metadata.generated.schema.api.tests.createColumnTest import CreateColumnTestRequest
from metadata.generated.schema.tests.column import columnValuesSumToBeBetween
from metadata.generated.schema.tests.columnTest import ColumnTestType
from metadata.great_expectations.builders.column.base_column_test_builder import (
BaseColumnTestBuilder,
)
class ColumnValueSumToBeBetweenBuilder(BaseColumnTestBuilder):
"""Builder for `expect_column_value_lengths_to_be_between` GE expectation"""
def _build_test(self) -> CreateColumnTestRequest:
"""Specific test builder for the test"""
return self.build_test_request(
config=columnValuesSumToBeBetween.ColumnValuesSumToBeBetween(
minValueForColSum=self.result["expectation_config"]["kwargs"].get(
"min_value"
),
maxValueForColSum=self.result["expectation_config"]["kwargs"].get(
"max_value"
),
),
test_type=ColumnTestType.columnValuesSumToBeBetween,
)

View File

@ -0,0 +1,33 @@
# Copyright 2022 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
TestCase builder
"""
from metadata.generated.schema.api.tests.createColumnTest import CreateColumnTestRequest
from metadata.generated.schema.tests.column import columnValuesToBeInSet
from metadata.generated.schema.tests.columnTest import ColumnTestType
from metadata.great_expectations.builders.column.base_column_test_builder import (
BaseColumnTestBuilder,
)
class ColumnValuesToBeInSetBuilder(BaseColumnTestBuilder):
"""Builder for `expect_column_values_to_not_be_in_set` GE expectation"""
def _build_test(self) -> CreateColumnTestRequest:
"""Specific test builder for the test"""
return self.build_test_request(
config=columnValuesToBeInSet.ColumnValuesToBeInSet(
allowedValues=self.result["expectation_config"]["kwargs"]["value_set"],
),
test_type=ColumnTestType.columnValuesToBeInSet,
)

View File

@ -12,12 +12,24 @@
from enum import Enum
from metadata.great_expectations.builders.column.value_max_to_be_between import (
ColumnValueMaxToBeBetweenBuilder,
)
from metadata.great_expectations.builders.column.value_min_to_be_between import (
ColumnValueMinToBeBetweenBuilder,
)
from metadata.great_expectations.builders.column.values_lengths_to_be_between import (
ColumnValuesLengthsToBeBetweenBuilder,
)
from metadata.great_expectations.builders.column.values_sum_to_be_between import (
ColumnValueSumToBeBetweenBuilder,
)
from metadata.great_expectations.builders.column.values_to_be_between import (
ColumnValuesToBeBetweenBuilder,
)
from metadata.great_expectations.builders.column.values_to_be_in_set import (
ColumnValuesToBeInSetBuilder,
)
from metadata.great_expectations.builders.column.values_to_be_not_in_set import (
ColumnValuesToBeNotInSetBuilder,
)
@ -30,9 +42,18 @@ from metadata.great_expectations.builders.column.values_to_be_unique import (
from metadata.great_expectations.builders.column.values_to_match_regex import (
ColumnValuesToMatchRegexBuilder,
)
from metadata.great_expectations.builders.table.column_count_to_be_between import (
TableColumnCountToBeBetweenBuilder,
)
from metadata.great_expectations.builders.table.column_count_to_equal import (
TableColumCountToEqualBuilder,
)
from metadata.great_expectations.builders.table.column_name_to_exist import (
TableColumnNameToExistBuilder,
)
from metadata.great_expectations.builders.table.column_name_to_match_set import (
TableColumnNameToMatchSetBuilder,
)
from metadata.great_expectations.builders.table.row_count_to_be_between import (
TableRowCountToBeBetweenBuilder,
)
@ -54,3 +75,10 @@ class SupportedGETests(Enum):
expect_column_values_to_not_be_null = ColumnValuesToBeNotNullBuilder()
expect_column_values_to_be_unique = ColumnValuesToBeUniqueBuilder()
expect_column_values_to_match_regex = ColumnValuesToMatchRegexBuilder()
expect_table_column_count_to_be_between = TableColumnCountToBeBetweenBuilder()
expect_column_to_exist = TableColumnNameToExistBuilder()
expect_table_columns_to_match_set = TableColumnNameToMatchSetBuilder()
expect_column_values_to_be_in_set = ColumnValuesToBeInSetBuilder()
expect_column_max_to_be_between = ColumnValueMaxToBeBetweenBuilder()
expect_column_min_to_be_between = ColumnValueMinToBeBetweenBuilder()
expect_column_sum_to_be_between = ColumnValueSumToBeBetweenBuilder()

View File

@ -77,9 +77,18 @@ class BaseTableTestBuilder(ABC):
testCaseStatus=TestCaseStatus.Success
if self.result["success"]
else TestCaseStatus.Failed,
result=self.result["result"]["observed_value"],
result=self._get_expectation_result(),
)
def _get_expectation_result(self):
"""Get the expectation result"""
if self.result["result"]:
if isinstance(self.result["result"]["observed_value"], list):
return ", ".join(self.result["result"].get("observed_value"))
return self.result["result"]["observed_value"]
return None
def build_test_request(self, *, config, test_type) -> CreateTableTestRequest:
"""Build a test case request to add the test to the tabe

View File

@ -0,0 +1,34 @@
# Copyright 2022 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
TestCase builder for table column count
"""
from metadata.generated.schema.api.tests.createTableTest import CreateTableTestRequest
from metadata.generated.schema.tests.table import tableColumnCountToBeBetween
from metadata.generated.schema.tests.tableTest import TableTestType
from metadata.great_expectations.builders.table.base_table_test_builders import (
BaseTableTestBuilder,
)
class TableColumnCountToBeBetweenBuilder(BaseTableTestBuilder):
"""Builder for `expect_table_row_count_to_be_between` GE expectation"""
def _build_test(self) -> CreateTableTestRequest:
"""Specific test builder for the test"""
return self.build_test_request(
config=tableColumnCountToBeBetween.TableColumnCountToBeBetween(
minColValue=self.result["expectation_config"]["kwargs"]["min_value"],
maxColValue=self.result["expectation_config"]["kwargs"]["max_value"],
),
test_type=TableTestType.tableColumnCountToBeBetween,
)

View File

@ -0,0 +1,33 @@
# Copyright 2022 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
TestCase builder for table column count
"""
from metadata.generated.schema.api.tests.createTableTest import CreateTableTestRequest
from metadata.generated.schema.tests.table import tableColumnNameToExist
from metadata.generated.schema.tests.tableTest import TableTestType
from metadata.great_expectations.builders.table.base_table_test_builders import (
BaseTableTestBuilder,
)
class TableColumnNameToExistBuilder(BaseTableTestBuilder):
"""Builder for `expect_table_row_count_to_be_between` GE expectation"""
def _build_test(self) -> CreateTableTestRequest:
"""Specific test builder for the test"""
return self.build_test_request(
config=tableColumnNameToExist.TableColumnNameToExist(
columnName=self.result["expectation_config"]["kwargs"]["column"],
),
test_type=TableTestType.tableColumnNameToExist,
)

View File

@ -0,0 +1,37 @@
# Copyright 2022 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
TestCase builder for table column count
"""
from metadata.generated.schema.api.tests.createTableTest import CreateTableTestRequest
from metadata.generated.schema.tests.table import tableColumnToMatchSet
from metadata.generated.schema.tests.tableTest import TableTestType
from metadata.great_expectations.builders.table.base_table_test_builders import (
BaseTableTestBuilder,
)
class TableColumnNameToMatchSetBuilder(BaseTableTestBuilder):
"""Builder for `expect_table_row_count_to_be_between` GE expectation"""
def _build_test(self) -> CreateTableTestRequest:
"""Specific test builder for the test"""
return self.build_test_request(
config=tableColumnToMatchSet.TableColumnToMatchSet(
columnNames=",".join(
self.result["expectation_config"]["kwargs"]["column_set"]
),
ordered=self.result["expectation_config"]["kwargs"].get("exact_match")
or False,
),
test_type=TableTestType.tableColumnToMatchSet,
)

View File

@ -69,9 +69,13 @@ def render_template(environment: Environment, template_file: str = "config.yml")
tmplt = environment.get_template(template_file)
return tmplt.render()
except TemplateNotFound as err:
raise TemplateNotFound(
f"Config file at {environment.loader.searchpath} not found"
) from err
try:
tmplt = environment.get_template("config.yaml")
return tmplt.render()
except TemplateNotFound as err:
raise TemplateNotFound(
f"Config file at {environment.loader.searchpath} not found"
) from err
def create_ometa_connection_obj(config: str) -> OpenMetadataConnection: