diff --git a/ingestion/src/metadata/__version__.py b/ingestion/src/metadata/__version__.py index c81446723b1..22e1d39a582 100644 --- a/ingestion/src/metadata/__version__.py +++ b/ingestion/src/metadata/__version__.py @@ -16,6 +16,8 @@ import os import re import sys +import pkg_resources + try: from importlib.metadata import version except ImportError: @@ -85,3 +87,14 @@ def get_major_minor_version() -> str: """ major, minor, *_ = sys.version_info return f"{major}.{minor}" + + +def match_versions(version1: str, version2: str) -> bool: + """Check if both versions match in minor and major""" + server_semver = pkg_resources.parse_version(version1) + client_semver = pkg_resources.parse_version(version2) + + return ( + server_semver.major == client_semver.major + and server_semver.minor == client_semver.minor + ) diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/server_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/server_mixin.py index 4ee98621570..7a2e3892e62 100755 --- a/ingestion/src/metadata/ingestion/ometa/mixins/server_mixin.py +++ b/ingestion/src/metadata/ingestion/ometa/mixins/server_mixin.py @@ -15,7 +15,11 @@ To be used by OpenMetadata class """ from typing import Optional -from metadata.__version__ import get_client_version, get_server_version_from_string +from metadata.__version__ import ( + get_client_version, + get_server_version_from_string, + match_versions, +) from metadata.generated.schema.settings.settings import Settings, SettingType from metadata.ingestion.models.encoders import show_secrets_encoder from metadata.ingestion.ometa.client import REST @@ -72,18 +76,17 @@ class OMetaServerMixin: f"OpenMetadata client running with Server version [{server_version}] and Client version [{client_version}]" ) - # Server version will be 0.13.2, vs 0.13.2.X from the client. - # If the server version is contained in the client version, then we're good to go - if server_version not in client_version: + if not match_versions(server_version, client_version): raise VersionMismatchException( - f"Server version is {server_version} vs. Client version {client_version}. Both should match." + f"Server version is {server_version} vs. Client version {client_version}." + f" Major and minor versions should match." ) def create_or_update_settings(self, settings: Settings) -> Settings: """Create of update setting Args: - setting (Settings): setting to update or create + settings (Settings): setting to update or create Returns: Settings @@ -95,9 +98,6 @@ class OMetaServerMixin: def get_settings_by_name(self, setting_type: SettingType) -> Optional[Settings]: """Get setting by name - Args: - setting (Settings): setting to update or create - Returns: Settings """ @@ -111,9 +111,6 @@ class OMetaServerMixin: def get_profiler_config_settings(self) -> Optional[Settings]: """Get profiler config setting - Args: - setting (Settings): setting to update or create - Returns: Settings """ diff --git a/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py b/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py index d790efe4e94..0005bd12599 100644 --- a/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py @@ -28,7 +28,6 @@ from metadata.generated.schema.entity.data.table import ( ModelType, Table, ) -from metadata.generated.schema.entity.services.databaseService import DatabaseService from metadata.generated.schema.entity.services.ingestionPipelines.status import ( StackTraceError, ) diff --git a/ingestion/tests/unit/test_version.py b/ingestion/tests/unit/test_version.py index cea527e4da6..b81aefd1e63 100644 --- a/ingestion/tests/unit/test_version.py +++ b/ingestion/tests/unit/test_version.py @@ -11,44 +11,38 @@ """ Validate Server Mixin version methods """ - -from unittest import TestCase - from metadata.__version__ import ( get_client_version_from_string, get_server_version_from_string, + match_versions, ) -class OMetaVersionTest(TestCase): +def test_get_version_from_string(): """ - Check version methods + We should be able to parse regular version responses """ + assert "0.11.0" == get_server_version_from_string("0.11.0.dev0") + assert "0.11.0" == get_server_version_from_string("0.11.0") + assert "1111.11.111" == get_server_version_from_string("1111.11.111") + assert "1111.11.111" == get_server_version_from_string("1111.11.111-SNAPSHOT") + assert "0.11.1" == get_server_version_from_string("0.11.1.0.0.1.patch") - def test_get_version_from_string(self): - """ - We should be able to parse regular version responses - """ - self.assertEqual("0.11.0", get_server_version_from_string("0.11.0.dev0")) - self.assertEqual("0.11.0", get_server_version_from_string("0.11.0")) - self.assertEqual("1111.11.111", get_server_version_from_string("1111.11.111")) - self.assertEqual( - "1111.11.111", get_server_version_from_string("1111.11.111-SNAPSHOT") - ) - self.assertEqual("0.11.1", get_server_version_from_string("0.11.1.0.0.1.patch")) - def test_get_client_version_from_string(self): - """ - We should be able to parse regular version responses - """ - self.assertEqual("0.13.2.5", get_client_version_from_string("0.13.2.5.dev0")) - self.assertEqual("0.11.0.1", get_client_version_from_string("0.11.0.1")) - self.assertEqual( - "1111.11.111.1", get_client_version_from_string("1111.11.111.1") - ) - self.assertEqual( - "1111.11.111.2", get_client_version_from_string("1111.11.111.2-SNAPSHOT") - ) - self.assertEqual( - "0.11.1.0", get_client_version_from_string("0.11.1.0.0.1.patch") - ) +def test_get_client_version_from_string(): + """ + We should be able to parse regular version responses + """ + assert "0.13.2.5" == get_client_version_from_string("0.13.2.5.dev0") + assert "0.11.0.1" == get_client_version_from_string("0.11.0.1") + assert "1111.11.111.1" == get_client_version_from_string("1111.11.111.1") + assert "1111.11.111.2" == get_client_version_from_string("1111.11.111.2-SNAPSHOT") + assert "0.11.1.0" == get_client_version_from_string("0.11.1.0.0.1.patch") + + +def test_match_version(): + """We only match major and minor versions""" + assert match_versions("0.11.0", "0.11.0") + assert match_versions("0.11.0", "0.11.1") + assert not match_versions("1.3.0", "1.4.0") + assert not match_versions("1.3.0", "2.3.0") diff --git a/openmetadata-docs/content/v1.3.x/deployment/ingestion/openmetadata.md b/openmetadata-docs/content/v1.3.x/deployment/ingestion/openmetadata.md index 153a80d580b..5050fd8dfdd 100644 --- a/openmetadata-docs/content/v1.3.x/deployment/ingestion/openmetadata.md +++ b/openmetadata-docs/content/v1.3.x/deployment/ingestion/openmetadata.md @@ -27,7 +27,7 @@ as a starting point. 1. **If you do not have an Airflow service** up and running on your platform, we provide a custom [Docker](https://hub.docker.com/r/openmetadata/ingestion) image, which already contains the OpenMetadata ingestion packages and custom [Airflow APIs](https://github.com/open-metadata/openmetadata-airflow-apis) to - deploy Workflows from the UI as well. This is the simplest approach. + deploy Workflows from the UI as well. **This is the simplest approach**. 2. If you already have Airflow up and running and want to use it for the metadata ingestion, you will need to install the ingestion modules to the host. You can find more information on how to do this in the Custom Airflow Installation section. @@ -50,6 +50,17 @@ and Airflow's Webserver: You can find more information on Airflow's Access Control [here](https://airflow.apache.org/docs/apache-airflow/stable/security/access-control.html). +## Shared Volumes + +{% note noteType="Warning" %} + +The Airflow Webserver, Scheduler and Workers - if using a distributed setup - need to have access to the same shared volumes +with RWX permissions. + +{% /note %} + +We have specific instructions on how to set up the shared volumes in Kubernetes depending on your cloud deployment [here](/deployment/kubernetes). + ## Using the OpenMetadata Ingestion Image If you are using our `openmetadata/ingestion` Docker image, there is just one thing to do: Configure the OpenMetadata server. @@ -359,6 +370,25 @@ a couple of points to validate: {"code":403,"message":"Principal: CatalogPrincipal{name='ingestion-bot'} operations [ViewAll] not allowed"} ``` +### AirflowException: Dag 'XYZ' could not be found + +If you're seeing a similar error to + +``` +[...] +task_run + _dag = get_dag(args.subdir, args.dag_id) + File "/home/airflow/.local/lib/python3.9/site-packages/airflow/utils/cli.py", line 235, in get_dag + raise AirflowException( +airflow.exceptions.AirflowException: Dag '...' could not be found; either it does not exist or it failed to parse. +``` + +This is a common situation where you have not properly enabled the shared volumes between Webserver <> Scheduler <> Worker +in your distributed environment. + +We have specific instructions on how to set up the shared volumes in Kubernetes depending on your cloud deployment [here](/deployment/kubernetes). + + ### ClientInitializationError The main root cause here is a version mismatch between the server and the client. Make sure that the `openmetadata-ingestion` diff --git a/openmetadata-docs/content/v1.4.x-SNAPSHOT/deployment/ingestion/openmetadata.md b/openmetadata-docs/content/v1.4.x-SNAPSHOT/deployment/ingestion/openmetadata.md index f05549ab08e..4600ad0cdaa 100644 --- a/openmetadata-docs/content/v1.4.x-SNAPSHOT/deployment/ingestion/openmetadata.md +++ b/openmetadata-docs/content/v1.4.x-SNAPSHOT/deployment/ingestion/openmetadata.md @@ -27,7 +27,7 @@ as a starting point. 1. **If you do not have an Airflow service** up and running on your platform, we provide a custom [Docker](https://hub.docker.com/r/openmetadata/ingestion) image, which already contains the OpenMetadata ingestion packages and custom [Airflow APIs](https://github.com/open-metadata/openmetadata-airflow-apis) to - deploy Workflows from the UI as well. This is the simplest approach. + deploy Workflows from the UI as well. **This is the simplest approach**. 2. If you already have Airflow up and running and want to use it for the metadata ingestion, you will need to install the ingestion modules to the host. You can find more information on how to do this in the Custom Airflow Installation section. @@ -50,6 +50,18 @@ and Airflow's Webserver: You can find more information on Airflow's Access Control [here](https://airflow.apache.org/docs/apache-airflow/stable/security/access-control.html). +## Shared Volumes + +{% note noteType="Warning" %} + +The Airflow Webserver, Scheduler and Workers - if using a distributed setup - need to have access to the same shared volumes +with RWX permissions. + +{% /note %} + +We have specific instructions on how to set up the shared volumes in Kubernetes depending on your cloud deployment [here](/deployment/kubernetes). + + ## Using the OpenMetadata Ingestion Image If you are using our `openmetadata/ingestion` Docker image, there is just one thing to do: Configure the OpenMetadata server. @@ -359,6 +371,25 @@ a couple of points to validate: {"code":403,"message":"Principal: CatalogPrincipal{name='ingestion-bot'} operations [ViewAll] not allowed"} ``` +### AirflowException: Dag 'XYZ' could not be found + +If you're seeing a similar error to + +``` +[...] +task_run + _dag = get_dag(args.subdir, args.dag_id) + File "/home/airflow/.local/lib/python3.9/site-packages/airflow/utils/cli.py", line 235, in get_dag + raise AirflowException( +airflow.exceptions.AirflowException: Dag '...' could not be found; either it does not exist or it failed to parse. +``` + +This is a common situation where you have not properly enabled the shared volumes between Webserver <> Scheduler <> Worker +in your distributed environment. + +We have specific instructions on how to set up the shared volumes in Kubernetes depending on your cloud deployment [here](/deployment/kubernetes). + + ### ClientInitializationError The main root cause here is a version mismatch between the server and the client. Make sure that the `openmetadata-ingestion`