MINOR - Version match logic update & Airflow docs (#16157)

* airflow docs

* update version validation

* MINOR - docs and version match
This commit is contained in:
Pere Miquel Brull 2024-05-08 07:37:14 +02:00 committed by GitHub
parent abdaea55e2
commit 39eed12f32
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 110 additions and 46 deletions

View File

@ -16,6 +16,8 @@ import os
import re
import sys
import pkg_resources
try:
from importlib.metadata import version
except ImportError:
@ -85,3 +87,14 @@ def get_major_minor_version() -> str:
"""
major, minor, *_ = sys.version_info
return f"{major}.{minor}"
def match_versions(version1: str, version2: str) -> bool:
"""Check if both versions match in minor and major"""
server_semver = pkg_resources.parse_version(version1)
client_semver = pkg_resources.parse_version(version2)
return (
server_semver.major == client_semver.major
and server_semver.minor == client_semver.minor
)

View File

@ -15,7 +15,11 @@ To be used by OpenMetadata class
"""
from typing import Optional
from metadata.__version__ import get_client_version, get_server_version_from_string
from metadata.__version__ import (
get_client_version,
get_server_version_from_string,
match_versions,
)
from metadata.generated.schema.settings.settings import Settings, SettingType
from metadata.ingestion.models.encoders import show_secrets_encoder
from metadata.ingestion.ometa.client import REST
@ -72,18 +76,17 @@ class OMetaServerMixin:
f"OpenMetadata client running with Server version [{server_version}] and Client version [{client_version}]"
)
# Server version will be 0.13.2, vs 0.13.2.X from the client.
# If the server version is contained in the client version, then we're good to go
if server_version not in client_version:
if not match_versions(server_version, client_version):
raise VersionMismatchException(
f"Server version is {server_version} vs. Client version {client_version}. Both should match."
f"Server version is {server_version} vs. Client version {client_version}."
f" Major and minor versions should match."
)
def create_or_update_settings(self, settings: Settings) -> Settings:
"""Create of update setting
Args:
setting (Settings): setting to update or create
settings (Settings): setting to update or create
Returns:
Settings
@ -95,9 +98,6 @@ class OMetaServerMixin:
def get_settings_by_name(self, setting_type: SettingType) -> Optional[Settings]:
"""Get setting by name
Args:
setting (Settings): setting to update or create
Returns:
Settings
"""
@ -111,9 +111,6 @@ class OMetaServerMixin:
def get_profiler_config_settings(self) -> Optional[Settings]:
"""Get profiler config setting
Args:
setting (Settings): setting to update or create
Returns:
Settings
"""

View File

@ -28,7 +28,6 @@ from metadata.generated.schema.entity.data.table import (
ModelType,
Table,
)
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
StackTraceError,
)

View File

@ -11,44 +11,38 @@
"""
Validate Server Mixin version methods
"""
from unittest import TestCase
from metadata.__version__ import (
get_client_version_from_string,
get_server_version_from_string,
match_versions,
)
class OMetaVersionTest(TestCase):
def test_get_version_from_string():
"""
Check version methods
We should be able to parse regular version responses
"""
assert "0.11.0" == get_server_version_from_string("0.11.0.dev0")
assert "0.11.0" == get_server_version_from_string("0.11.0")
assert "1111.11.111" == get_server_version_from_string("1111.11.111")
assert "1111.11.111" == get_server_version_from_string("1111.11.111-SNAPSHOT")
assert "0.11.1" == get_server_version_from_string("0.11.1.0.0.1.patch")
def test_get_version_from_string(self):
"""
We should be able to parse regular version responses
"""
self.assertEqual("0.11.0", get_server_version_from_string("0.11.0.dev0"))
self.assertEqual("0.11.0", get_server_version_from_string("0.11.0"))
self.assertEqual("1111.11.111", get_server_version_from_string("1111.11.111"))
self.assertEqual(
"1111.11.111", get_server_version_from_string("1111.11.111-SNAPSHOT")
)
self.assertEqual("0.11.1", get_server_version_from_string("0.11.1.0.0.1.patch"))
def test_get_client_version_from_string(self):
"""
We should be able to parse regular version responses
"""
self.assertEqual("0.13.2.5", get_client_version_from_string("0.13.2.5.dev0"))
self.assertEqual("0.11.0.1", get_client_version_from_string("0.11.0.1"))
self.assertEqual(
"1111.11.111.1", get_client_version_from_string("1111.11.111.1")
)
self.assertEqual(
"1111.11.111.2", get_client_version_from_string("1111.11.111.2-SNAPSHOT")
)
self.assertEqual(
"0.11.1.0", get_client_version_from_string("0.11.1.0.0.1.patch")
)
def test_get_client_version_from_string():
"""
We should be able to parse regular version responses
"""
assert "0.13.2.5" == get_client_version_from_string("0.13.2.5.dev0")
assert "0.11.0.1" == get_client_version_from_string("0.11.0.1")
assert "1111.11.111.1" == get_client_version_from_string("1111.11.111.1")
assert "1111.11.111.2" == get_client_version_from_string("1111.11.111.2-SNAPSHOT")
assert "0.11.1.0" == get_client_version_from_string("0.11.1.0.0.1.patch")
def test_match_version():
"""We only match major and minor versions"""
assert match_versions("0.11.0", "0.11.0")
assert match_versions("0.11.0", "0.11.1")
assert not match_versions("1.3.0", "1.4.0")
assert not match_versions("1.3.0", "2.3.0")

View File

@ -27,7 +27,7 @@ as a starting point.
1. **If you do not have an Airflow service** up and running on your platform, we provide a custom
[Docker](https://hub.docker.com/r/openmetadata/ingestion) image, which already contains the OpenMetadata ingestion
packages and custom [Airflow APIs](https://github.com/open-metadata/openmetadata-airflow-apis) to
deploy Workflows from the UI as well. This is the simplest approach.
deploy Workflows from the UI as well. **This is the simplest approach**.
2. If you already have Airflow up and running and want to use it for the metadata ingestion, you will
need to install the ingestion modules to the host. You can find more information on how to do this
in the Custom Airflow Installation section.
@ -50,6 +50,17 @@ and Airflow's Webserver:
You can find more information on Airflow's Access Control [here](https://airflow.apache.org/docs/apache-airflow/stable/security/access-control.html).
## Shared Volumes
{% note noteType="Warning" %}
The Airflow Webserver, Scheduler and Workers - if using a distributed setup - need to have access to the same shared volumes
with RWX permissions.
{% /note %}
We have specific instructions on how to set up the shared volumes in Kubernetes depending on your cloud deployment [here](/deployment/kubernetes).
## Using the OpenMetadata Ingestion Image
If you are using our `openmetadata/ingestion` Docker image, there is just one thing to do: Configure the OpenMetadata server.
@ -359,6 +370,25 @@ a couple of points to validate:
{"code":403,"message":"Principal: CatalogPrincipal{name='ingestion-bot'} operations [ViewAll] not allowed"}
```
### AirflowException: Dag 'XYZ' could not be found
If you're seeing a similar error to
```
[...]
task_run
_dag = get_dag(args.subdir, args.dag_id)
File "/home/airflow/.local/lib/python3.9/site-packages/airflow/utils/cli.py", line 235, in get_dag
raise AirflowException(
airflow.exceptions.AirflowException: Dag '...' could not be found; either it does not exist or it failed to parse.
```
This is a common situation where you have not properly enabled the shared volumes between Webserver <> Scheduler <> Worker
in your distributed environment.
We have specific instructions on how to set up the shared volumes in Kubernetes depending on your cloud deployment [here](/deployment/kubernetes).
### ClientInitializationError
The main root cause here is a version mismatch between the server and the client. Make sure that the `openmetadata-ingestion`

View File

@ -27,7 +27,7 @@ as a starting point.
1. **If you do not have an Airflow service** up and running on your platform, we provide a custom
[Docker](https://hub.docker.com/r/openmetadata/ingestion) image, which already contains the OpenMetadata ingestion
packages and custom [Airflow APIs](https://github.com/open-metadata/openmetadata-airflow-apis) to
deploy Workflows from the UI as well. This is the simplest approach.
deploy Workflows from the UI as well. **This is the simplest approach**.
2. If you already have Airflow up and running and want to use it for the metadata ingestion, you will
need to install the ingestion modules to the host. You can find more information on how to do this
in the Custom Airflow Installation section.
@ -50,6 +50,18 @@ and Airflow's Webserver:
You can find more information on Airflow's Access Control [here](https://airflow.apache.org/docs/apache-airflow/stable/security/access-control.html).
## Shared Volumes
{% note noteType="Warning" %}
The Airflow Webserver, Scheduler and Workers - if using a distributed setup - need to have access to the same shared volumes
with RWX permissions.
{% /note %}
We have specific instructions on how to set up the shared volumes in Kubernetes depending on your cloud deployment [here](/deployment/kubernetes).
## Using the OpenMetadata Ingestion Image
If you are using our `openmetadata/ingestion` Docker image, there is just one thing to do: Configure the OpenMetadata server.
@ -359,6 +371,25 @@ a couple of points to validate:
{"code":403,"message":"Principal: CatalogPrincipal{name='ingestion-bot'} operations [ViewAll] not allowed"}
```
### AirflowException: Dag 'XYZ' could not be found
If you're seeing a similar error to
```
[...]
task_run
_dag = get_dag(args.subdir, args.dag_id)
File "/home/airflow/.local/lib/python3.9/site-packages/airflow/utils/cli.py", line 235, in get_dag
raise AirflowException(
airflow.exceptions.AirflowException: Dag '...' could not be found; either it does not exist or it failed to parse.
```
This is a common situation where you have not properly enabled the shared volumes between Webserver <> Scheduler <> Worker
in your distributed environment.
We have specific instructions on how to set up the shared volumes in Kubernetes depending on your cloud deployment [here](/deployment/kubernetes).
### ClientInitializationError
The main root cause here is a version mismatch between the server and the client. Make sure that the `openmetadata-ingestion`