mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-15 01:27:09 +00:00
parent
01309249c8
commit
811f640a18
@ -90,7 +90,8 @@
|
|||||||
"GEOGRAPHY",
|
"GEOGRAPHY",
|
||||||
"ENUM",
|
"ENUM",
|
||||||
"JSON",
|
"JSON",
|
||||||
"UUID"
|
"UUID",
|
||||||
|
"VARIANT"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"constraint": {
|
"constraint": {
|
||||||
|
|||||||
@ -24,7 +24,8 @@ echo "Prepare Docker volume for the operators"@
|
|||||||
cd docker/local-metadata
|
cd docker/local-metadata
|
||||||
echo "Starting Local Docker Containers"
|
echo "Starting Local Docker Containers"
|
||||||
|
|
||||||
docker compose down && docker compose up --build -d
|
echo "Using ingestion dependency: ${INGESTION_DEPENDENCY:-all}"
|
||||||
|
docker compose down && docker compose build --build-arg INGESTION_DEPENDENCY="${INGESTION_DEPENDENCY:-all}" && docker compose up -d
|
||||||
|
|
||||||
until curl -s -f "http://localhost:9200/_cat/indices/team_search_index"; do
|
until curl -s -f "http://localhost:9200/_cat/indices/team_search_index"; do
|
||||||
printf 'Checking if Elastic Search instance is up...\n'
|
printf 'Checking if Elastic Search instance is up...\n'
|
||||||
|
|||||||
@ -31,9 +31,6 @@ ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints
|
|||||||
# Add docker provider for the DockerOperator
|
# Add docker provider for the DockerOperator
|
||||||
RUN pip install "apache-airflow[docker]==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
|
RUN pip install "apache-airflow[docker]==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
|
||||||
|
|
||||||
RUN pip install "openmetadata-ingestion[all]"
|
|
||||||
RUN pip uninstall openmetadata-ingestion -y
|
|
||||||
|
|
||||||
FROM airflow as apis
|
FROM airflow as apis
|
||||||
WORKDIR /openmetadata-airflow-apis
|
WORKDIR /openmetadata-airflow-apis
|
||||||
COPY openmetadata-airflow-apis /openmetadata-airflow-apis
|
COPY openmetadata-airflow-apis /openmetadata-airflow-apis
|
||||||
@ -44,7 +41,7 @@ FROM apis as ingestion
|
|||||||
WORKDIR /ingestion
|
WORKDIR /ingestion
|
||||||
COPY ingestion /ingestion
|
COPY ingestion /ingestion
|
||||||
|
|
||||||
ARG INGESTION_DEPENDENCY=all
|
ARG INGESTION_DEPENDENCY
|
||||||
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
|
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
|
||||||
|
|
||||||
# Uninstalling psycopg2-binary and installing psycopg2 instead
|
# Uninstalling psycopg2-binary and installing psycopg2 instead
|
||||||
|
|||||||
@ -70,6 +70,23 @@ _TYPE_MAP = {
|
|||||||
SQA_RESERVED_ATTRIBUTES = ["metadata"]
|
SQA_RESERVED_ATTRIBUTES = ["metadata"]
|
||||||
|
|
||||||
|
|
||||||
|
def map_types(col: Column, table_service_type):
|
||||||
|
"""returns an ORM type"""
|
||||||
|
|
||||||
|
if col.arrayDataType:
|
||||||
|
return _TYPE_MAP.get(col.dataType)(item_type=col.arrayDataType)
|
||||||
|
|
||||||
|
if (
|
||||||
|
table_service_type == databaseService.DatabaseServiceType.Snowflake
|
||||||
|
and col.dataType == DataType.JSON
|
||||||
|
):
|
||||||
|
from snowflake.sqlalchemy import VARIANT
|
||||||
|
|
||||||
|
return VARIANT
|
||||||
|
|
||||||
|
return _TYPE_MAP.get(col.dataType)
|
||||||
|
|
||||||
|
|
||||||
def check_snowflake_case_sensitive(table_service_type, table_or_col) -> Optional[bool]:
|
def check_snowflake_case_sensitive(table_service_type, table_or_col) -> Optional[bool]:
|
||||||
"""Check whether column or table name are not uppercase for snowflake table.
|
"""Check whether column or table name are not uppercase for snowflake table.
|
||||||
If so, then force quoting, If not return None to let engine backend handle the logic.
|
If so, then force quoting, If not return None to let engine backend handle the logic.
|
||||||
@ -97,11 +114,10 @@ def build_orm_col(idx: int, col: Column, table_service_type) -> sqlalchemy.Colum
|
|||||||
As this is only used for INSERT/UPDATE/DELETE,
|
As this is only used for INSERT/UPDATE/DELETE,
|
||||||
there is no impact for our read-only purposes.
|
there is no impact for our read-only purposes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return sqlalchemy.Column(
|
return sqlalchemy.Column(
|
||||||
name=str(col.name.__root__),
|
name=str(col.name.__root__),
|
||||||
type_=_TYPE_MAP.get(col.dataType)
|
type_=map_types(col, table_service_type),
|
||||||
if not col.arrayDataType
|
|
||||||
else _TYPE_MAP.get(col.dataType)(item_type=col.arrayDataType),
|
|
||||||
primary_key=not bool(idx), # The first col seen is used as PK
|
primary_key=not bool(idx), # The first col seen is used as PK
|
||||||
quote=check_snowflake_case_sensitive(table_service_type, col.name.__root__),
|
quote=check_snowflake_case_sensitive(table_service_type, col.name.__root__),
|
||||||
key=str(
|
key=str(
|
||||||
|
|||||||
@ -110,6 +110,17 @@ class TestSuiteWorkflow:
|
|||||||
)
|
)
|
||||||
raise err
|
raise err
|
||||||
|
|
||||||
|
def _filter_test_cases_for_table_entity(
|
||||||
|
self, table_fqn: str, test_cases: List[TestCase]
|
||||||
|
) -> list[TestCase]:
|
||||||
|
"""Filter test cases for specific entity"""
|
||||||
|
return [
|
||||||
|
test_case
|
||||||
|
for test_case in test_cases
|
||||||
|
if test_case.entityLink.__root__.split("::")[2].replace(">", "")
|
||||||
|
== table_fqn
|
||||||
|
]
|
||||||
|
|
||||||
def _get_unique_table_entities(self, test_cases: List[TestCase]) -> Set:
|
def _get_unique_table_entities(self, test_cases: List[TestCase]) -> Set:
|
||||||
"""from a list of test cases extract unique table entities"""
|
"""from a list of test cases extract unique table entities"""
|
||||||
table_fqns = [
|
table_fqns = [
|
||||||
@ -240,6 +251,7 @@ class TestSuiteWorkflow:
|
|||||||
entity=TestSuite,
|
entity=TestSuite,
|
||||||
fqn=self.config.source.serviceName,
|
fqn=self.config.source.serviceName,
|
||||||
)
|
)
|
||||||
|
|
||||||
if test_suite:
|
if test_suite:
|
||||||
return [test_suite]
|
return [test_suite]
|
||||||
return None
|
return None
|
||||||
@ -377,7 +389,9 @@ class TestSuiteWorkflow:
|
|||||||
for table_fqn in unique_table_fqns:
|
for table_fqn in unique_table_fqns:
|
||||||
try:
|
try:
|
||||||
sqa_interface = self._create_sqa_tests_runner_interface(table_fqn)
|
sqa_interface = self._create_sqa_tests_runner_interface(table_fqn)
|
||||||
for test_case in test_cases:
|
for test_case in self._filter_test_cases_for_table_entity(
|
||||||
|
table_fqn, test_cases
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
data_test_runner = self._create_data_tests_runner(sqa_interface)
|
data_test_runner = self._create_data_tests_runner(sqa_interface)
|
||||||
test_result = data_test_runner.run_and_handle(test_case)
|
test_result = data_test_runner.run_and_handle(test_case)
|
||||||
|
|||||||
@ -85,6 +85,18 @@ def column_value_length_to_be_between(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not max_value_length_value_res or not min_value_length_value_res:
|
||||||
|
msg = f"Error computing {test_case.name} for {runner.table.__tablename__}: missing max value length or min value length"
|
||||||
|
return TestCaseResult(
|
||||||
|
timestamp=execution_date,
|
||||||
|
testCaseStatus=TestCaseStatus.Aborted,
|
||||||
|
result=msg,
|
||||||
|
testResultValue=[
|
||||||
|
TestResultValue(name="minValueLength", value=None),
|
||||||
|
TestResultValue(name="maxValueLength", value=None),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
min_bound = next(
|
min_bound = next(
|
||||||
(
|
(
|
||||||
float(param.value)
|
float(param.value)
|
||||||
@ -102,8 +114,8 @@ def column_value_length_to_be_between(
|
|||||||
|
|
||||||
status = (
|
status = (
|
||||||
TestCaseStatus.Success
|
TestCaseStatus.Success
|
||||||
if min_bound >= min_value_length_value_res
|
if min_bound <= min_value_length_value_res
|
||||||
and max_bound <= max_value_length_value_res
|
and max_bound >= max_value_length_value_res
|
||||||
else TestCaseStatus.Failed
|
else TestCaseStatus.Failed
|
||||||
)
|
)
|
||||||
result = (
|
result = (
|
||||||
|
|||||||
@ -56,7 +56,7 @@ regex expression to filter tables.
|
|||||||
Sampling percentage to apply for profiling tables.
|
Sampling percentage to apply for profiling tables.
|
||||||
|
|
||||||
**Thread Count**
|
**Thread Count**
|
||||||
Number of thread to use when computing metrics for the profiler
|
Number of thread to use when computing metrics for the profiler. For Snowflake users we recommend setting it to 1. There is a known issue with one of the dependency (`snowflake-connector-python`) affecting projects with certain environments.
|
||||||
|
|
||||||
**Ingest Sample Data**
|
**Ingest Sample Data**
|
||||||
Whether the profiler should ingest sample data
|
Whether the profiler should ingest sample data
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user