From ca812852d697ec2f5394449cc9eb3675ff04ecc8 Mon Sep 17 00:00:00 2001
From: Pere Menal-Ferrer
Date: Tue, 27 May 2025 10:56:52 +0200
Subject: [PATCH] ci/nox-setup-testing (#21377)
* Make pytest to user code from src rather than from install package
* Fix test_amundsen: missing None
* Update pytest configuration to use importlib mode
* Fix custom_basemodel_validation to check model_fields on type(values) to prevent noisy warnings
* Refactor referencedByQueries validation to use field_validator as per deprecation warning
* Update ColumnJson to use model_rebuild rather as replacement for forward reference updates as per deprecation warning
* Move superset test to integration test as they are using testcontainers
* Update coverage source path
* Fix wrong import.
* Add install_dev_env target to Makefile for development dependencies
* Add test-unit as extra in setup.py
* Modify dependencies in dev environment.
* Ignore all airflow tests
* Remove coverage in unit_ingestion_dev_env. Revert coverage source to prevent broken CI.
* Add nox for running unit test
* FIx PowerBI integration test to use pathlib for resource paths and not os.getcwd to prevent failures when not executed from the right path
* Move test_helpers.py to unit test, as it is not an integration test.
* Remove utils empty folder in integration tests
* Refactor testcontainers configuration to avoid pitfalls with max_tries setting
* Add nox unit testing basic setup
* Add format check session
* Refactor nox-unit and add plugins tests
* Add GHA for py-nox-ci
* Add comment to GHA
* Restore conftest.py file
* Clarify comment
* Simplify function
* Fix matrix startegy and nox mismatch
* Improve python version strategy with nox and GHA
---------
Co-authored-by: Pere Menal
---
.github/workflows/py-nox-ci.yml | 43 ++++++
.gitignore | 3 +
ingestion/Makefile | 6 +
.../{tests/integration/utils => }/__init__.py | 0
ingestion/noxfile.py | 138 ++++++++++++++++++
ingestion/setup.py | 8 +-
ingestion/tests/integration/conftest.py | 10 ++
.../powerbi/test_powerbi_file_client.py | 8 +-
.../tests/unit/airflow/test_lineage_parser.py | 11 +-
.../test_ometa_validation_action.py | 33 +++--
.../azuresql/test_azuresql_sampling.py | 9 ++
.../unit/topology/pipeline/test_airflow.py | 7 +
.../utils/test_helpers.py | 0
13 files changed, 252 insertions(+), 24 deletions(-)
create mode 100644 .github/workflows/py-nox-ci.yml
rename ingestion/{tests/integration/utils => }/__init__.py (100%)
create mode 100644 ingestion/noxfile.py
rename ingestion/tests/{integration => unit}/utils/test_helpers.py (100%)
diff --git a/.github/workflows/py-nox-ci.yml b/.github/workflows/py-nox-ci.yml
new file mode 100644
index 00000000000..145f10d9ef7
--- /dev/null
+++ b/.github/workflows/py-nox-ci.yml
@@ -0,0 +1,43 @@
+name: Python Nox CI
+
+# This is a temporary workflow to run format and unit tests using Nox.
+# It is intended to be manually triggered and will not run on pull requests or pushes.
+# Once this is fully tested and stable, we might replace the existing Python CI workflow with this one
+# to speed it up and simplify the process.
+
+on:
+ workflow_dispatch: # Manual trigger only
+
+jobs:
+ format-and-unit:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ["3.10", "3.11"]
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install uv and nox
+ run: |
+ curl -LsSf https://astral.sh/uv/install.sh | sh
+ pip install nox
+
+ - name: Run Code Quality Checks
+ # We only want to check the format for a single Python version,
+ # no need to run it for all versions
+ if: ${{ matrix.python-version == '3.10' }}
+ run: |
+ nox -s lint
+
+ - name: Run Unit Tests
+ run: |
+ PYTHON_VERSIONS="${{ matrix.python-version }}" nox -s unit
+
+ - name: Run Unit Tests (specific plugins)
+ run: |
+ PYTHON_VERSIONS="${{ matrix.python-version }}" nox -s unit-plugins
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index d17f53f6555..50de8df3975 100644
--- a/.gitignore
+++ b/.gitignore
@@ -131,3 +131,6 @@ ingestion/tests/cli_e2e/**/*test.yaml
# Cursor rules
.cursorrules
.cursor/
+
+# Nox
+ingestion/.nox/
diff --git a/ingestion/Makefile b/ingestion/Makefile
index 4b8204c92d6..85a31df5cbf 100644
--- a/ingestion/Makefile
+++ b/ingestion/Makefile
@@ -15,6 +15,8 @@ install: ## Install the ingestion module to the current environment
.PHONY: install_dev_env
install_dev_env: ## Install all dependencies for development (in edit mode)
+ pip install --upgrade pip
+ pip install nox
python -m pip install -e "$(INGESTION_DIR)[all-dev-env, dev, test-unit]"
.PHONY: install_dev
@@ -117,3 +119,7 @@ coverage: ## Run all Python tests and generate the coverage XML report
$(MAKE) run_python_tests
coverage xml --rcfile $(INGESTION_DIR)/pyproject.toml -o $(INGESTION_DIR)/coverage.xml || true
sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")/src/g" $(INGESTION_DIR)/coverage.xml >> $(INGESTION_DIR)/ci-coverage.xml
+
+.PHONY: clean-nox
+clean-nox:
+ rm -rf .nox
\ No newline at end of file
diff --git a/ingestion/tests/integration/utils/__init__.py b/ingestion/__init__.py
similarity index 100%
rename from ingestion/tests/integration/utils/__init__.py
rename to ingestion/__init__.py
diff --git a/ingestion/noxfile.py b/ingestion/noxfile.py
new file mode 100644
index 00000000000..b90f828cf26
--- /dev/null
+++ b/ingestion/noxfile.py
@@ -0,0 +1,138 @@
+# Copyright 2025 Collate
+# Licensed under the Collate Community License, Version 1.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Nox sessions for testing and formatting checks.
+"""
+import ast
+import os
+from pathlib import Path
+
+# NOTE: This is still a work in progress! We still need to:
+# - Fix ignored unit tests
+# - Add integration tests
+# - Address the TODOs in the code
+import nox
+
+# TODO: Add python 3.9. PYTHON 3.9 fails in Mac os due to problem with `psycopg2-binary` package
+
+SUPPORTED_PYTHON_VERSIONS = ["3.10", "3.11"]
+
+
+def get_python_versions():
+ # Check if we are in GitHub Actions (i.e., if the 'PYTHON_VERSIONS' environment variable is set)
+ if "PYTHON_VERSIONS" in os.environ:
+ # Return the list of Python versions passed from GitHub Actions matrix
+ python_versions = os.environ["PYTHON_VERSIONS"].split(",")
+ # if some versions are not supported, they will be ignored by nox
+ return python_versions
+ return SUPPORTED_PYTHON_VERSIONS
+
+
+@nox.session(
+ name="lint",
+ reuse_venv=False,
+ venv_backend="uv|venv",
+)
+def lint(session):
+ # Usually, we want just one Python version for linting and type check,
+ # so no need to specify them here
+ session.install(".[dev]")
+ # Configuration from pyproject.toml is taken into account out of the box
+ session.run("black", "--check", ".", "../openmetadata-airflow-apis/")
+ session.run("isort", "--check-only", ".", "../openmetadata-airflow-apis/")
+ session.run("pycln", "--diff", ".", "../openmetadata-airflow-apis/")
+ # TODO: It remains to adapt the command from the Makefile:
+ # PYTHONPATH="${PYTHONPATH}:$(INGESTION_DIR)/plugins" pylint --errors-only
+ # --rcfile=$(INGESTION_DIR)/pyproject.toml --fail-under=10 $(PY_SOURCE)/metadata
+ # || (echo "PyLint error code $$?"; exit 1)
+ # Some work is required to import plugins correctly
+
+
+@nox.session(
+ name="unit", reuse_venv=False, venv_backend="uv|venv", python=get_python_versions()
+)
+def unit(session):
+ session.install(".[all-dev-env, test-unit]")
+ # TODO: we need to install pip so that spaCy can install its dependencies
+ # we should find a way to avoid this
+ session.install("pip")
+
+ # TODO: We need to remove ignored test once they can be run properly within nox
+ # Run unit tests
+ ignored_tests = [
+ "test_ometa_endpoints.py",
+ "test_ometa_mlmodel.py",
+ "test_dbt.py",
+ "test_sample_usage.py",
+ "test_ssl_manager.py",
+ "test_usage_filter.py",
+ "test_import_checker.py",
+ "test_suite/",
+ "profiler/test_profiler_partitions.py",
+ "profiler/test_workflow.py",
+ "workflow",
+ "topology",
+ ]
+ ignore_args = [f"--ignore=tests/unit/{test}" for test in ignored_tests]
+
+ session.run("pytest", "tests/unit/", *ignore_args)
+
+
+# TEST PLUGINS
+PLUGINS_TESTS = {
+ "great-expectations": "tests/unit/great_expectations",
+}
+PLUGINS = list(PLUGINS_TESTS.keys())
+
+
+@nox.session(
+ name="unit-plugins",
+ reuse_venv=True,
+ venv_backend="uv|venv",
+ python=get_python_versions(),
+)
+@nox.parametrize("plugin", PLUGINS)
+def unit_plugins(session, plugin):
+ versions = extract_attribute_from_setup("VERSIONS", "setup.py")
+
+ if not versions:
+ session.error("Not able to extract VERSIONS from setup.py")
+ session.exit(1)
+
+ if plugin not in versions:
+ session.error(
+ f"Plugin {plugin} not found in VERSIONS. Available plugins: {list(versions)}"
+ )
+ session.exit(1)
+
+ session.install(".[test-unit]")
+ session.install(versions[plugin])
+ # Assuming the plugin has its own tests in a specific directory
+ session.run("pytest", PLUGINS_TESTS[plugin])
+
+
+def extract_attribute_from_setup(attr_name: str, setup_path: str = "setup.py"):
+ # TODO: We should consider using a more robust method to extract attributes
+ # such as moving out the attributes to a separate file.
+ setup_file = Path(setup_path)
+ if not setup_file.exists():
+ raise FileNotFoundError(f"{setup_path} not found")
+
+ with setup_file.open("r") as f:
+ tree = ast.parse(f.read(), filename=setup_path)
+
+ for node in ast.iter_child_nodes(tree):
+ if isinstance(node, ast.Assign):
+ for target in node.targets:
+ if isinstance(target, ast.Name) and target.id == attr_name:
+ return ast.literal_eval(node.value)
+
+ return None # Not found
diff --git a/ingestion/setup.py b/ingestion/setup.py
index 99fa19cf008..fe73aef63d2 100644
--- a/ingestion/setup.py
+++ b/ingestion/setup.py
@@ -382,6 +382,8 @@ test_unit = {
"pytest-order",
"dirty-equals",
"faker==37.1.0", # The version needs to be fixed to prevent flaky tests!
+ # TODO: Remove once no unit test requires testcontainers
+ "testcontainers",
}
test = {
@@ -449,7 +451,7 @@ test = {
"python-liquid",
VERSIONS["google-cloud-bigtable"],
*plugins["bigquery"],
- "Faker==37.1.0", # Fixed the version to prevent flaky tests!
+ "faker==37.1.0", # The version needs to be fixed to prevent flaky tests!
}
if sys.version_info >= (3, 9):
@@ -504,8 +506,8 @@ setup(
"data-insight": list(plugins["elasticsearch"]),
**{plugin: list(dependencies) for (plugin, dependencies) in plugins.items()},
# FIXME: all-dev-env is a temporary solution to install all dependencies except
- # those that might conflict with each other or cause issues in the dev environment
- # This covers all development cases where none of the plugins are used
+ # those that might conflict with each other or cause issues in the dev environment
+ # This covers all development cases where none of the plugins are used
"all-dev-env": filter_requirements(
{"airflow", "db2", "great-expectations", "pymssql"}
),
diff --git a/ingestion/tests/integration/conftest.py b/ingestion/tests/integration/conftest.py
index 4cfbd55c1a2..9826ba2bec0 100644
--- a/ingestion/tests/integration/conftest.py
+++ b/ingestion/tests/integration/conftest.py
@@ -43,6 +43,16 @@ def pytest_pycollect_makeitem(collector, name, obj):
pass
+# TODO: Will be addressed when cleaning up integration tests.
+# Setting the max tries for testcontainers here has pitfalls,
+# the main one being that it cannot be changed through the recommended
+# way of using environment variables. The main problem is that
+# waiting_utils.py uses testcontainers_config.timeout as a default
+# value for the timeout. Therefore, if we want to effectively change
+# this value, we must do so before the module is imported,
+# which is a potential source of issues.
+
+
@pytest.fixture(scope="session", autouse=sys.version_info >= (3, 9))
def config_testcontatiners():
from testcontainers.core.config import testcontainers_config
diff --git a/ingestion/tests/integration/powerbi/test_powerbi_file_client.py b/ingestion/tests/integration/powerbi/test_powerbi_file_client.py
index dd132d866b9..c18370f3e1b 100644
--- a/ingestion/tests/integration/powerbi/test_powerbi_file_client.py
+++ b/ingestion/tests/integration/powerbi/test_powerbi_file_client.py
@@ -13,7 +13,7 @@
PowerBI File Client tests
"""
-import os
+from pathlib import Path
from unittest import TestCase
from metadata.generated.schema.entity.services.connections.dashboard.powerBIConnection import (
@@ -24,7 +24,7 @@ from metadata.ingestion.source.dashboard.powerbi.file_client import (
_get_datamodel_schema_list,
)
-current_dir = os.getcwd()
+RESOURCES_DIR = Path(__file__).parent / "resources"
powerbi_connection_config = {
"type": "PowerBI",
@@ -36,8 +36,8 @@ powerbi_connection_config = {
"useAdminApis": False,
"pbitFilesSource": {
"pbitFileConfigType": "local",
- "path": f"{current_dir}/ingestion/tests/integration/powerbi/resources",
- "pbitFilesExtractDir": f"{current_dir}/ingestion/tests/integration/powerbi/resources/extracted",
+ "path": str(RESOURCES_DIR),
+ "pbitFilesExtractDir": str(RESOURCES_DIR / "extracted"),
},
}
diff --git a/ingestion/tests/unit/airflow/test_lineage_parser.py b/ingestion/tests/unit/airflow/test_lineage_parser.py
index 0e8a8756503..77145b5693a 100644
--- a/ingestion/tests/unit/airflow/test_lineage_parser.py
+++ b/ingestion/tests/unit/airflow/test_lineage_parser.py
@@ -14,9 +14,14 @@ Test lineage parser to get inlets and outlets information
from datetime import datetime
from typing import List, Set
-from airflow import DAG
-from airflow.operators.bash import BashOperator
-from airflow.serialization.serde import serialize
+import pytest
+
+try:
+ from airflow import DAG
+ from airflow.operators.bash import BashOperator
+ from airflow.serialization.serde import serialize
+except ImportError:
+ pytest.skip("Airflow dependencies not installed", allow_module_level=True)
from metadata.generated.schema.entity.data.container import Container
from metadata.generated.schema.entity.data.dashboard import Dashboard
diff --git a/ingestion/tests/unit/great_expectations/test_ometa_validation_action.py b/ingestion/tests/unit/great_expectations/test_ometa_validation_action.py
index ad80d9235a7..d056f1f8a11 100644
--- a/ingestion/tests/unit/great_expectations/test_ometa_validation_action.py
+++ b/ingestion/tests/unit/great_expectations/test_ometa_validation_action.py
@@ -13,29 +13,35 @@ Test suite for the action module implementation
"""
import os
-import subprocess
-import sys
from unittest import mock
-import great_expectations as gx
+import pytest
from jinja2 import Environment
from pytest import mark
from metadata.great_expectations.utils.ometa_config_handler import render_template
+_GX_0_18 = "0.18"
-def install_gx_018x():
- """Install GX 0.18.x at runtime as we support 0.18.x and 1.x.x and setup will install 1 default version"""
+try:
+ import great_expectations as gx
- if not gx.__version__.startswith("0.18."):
- subprocess.check_call(
- [sys.executable, "-m", "pip", "install", "great-expectations~=0.18.0"]
- )
-
-
-install_gx_018x()
+ from metadata.great_expectations.action import OpenMetadataValidationAction
+
+ _gx_version_ok = gx.__version__.startswith(_GX_0_18)
+except ImportError:
+ _gx_version_ok = False
+
+skip_gx = pytest.mark.skipif(
+ not _gx_version_ok,
+ reason=(
+ "Great Expectations not installed or version mismatch "
+ f"(required: {_GX_0_18})"
+ ),
+)
+@skip_gx
@mark.parametrize(
"input,expected",
[
@@ -45,7 +51,6 @@ install_gx_018x()
)
def test_get_table_entity(input, expected, mocked_ometa, mocked_ge_data_context):
"""Test get table entity"""
- from metadata.great_expectations.action import OpenMetadataValidationAction
ometa_validation = OpenMetadataValidationAction(
data_context=mocked_ge_data_context,
@@ -57,6 +62,7 @@ def test_get_table_entity(input, expected, mocked_ometa, mocked_ge_data_context)
assert res._type == expected
+@skip_gx
@mark.parametrize(
"input,expected",
[
@@ -68,7 +74,6 @@ def test_get_table_entity_database_service_name(
input, expected, mocked_ometa, mocked_ge_data_context
):
"""Test get table entity"""
- from metadata.great_expectations.action import OpenMetadataValidationAction
ometa_validation = OpenMetadataValidationAction(
data_context=mocked_ge_data_context,
diff --git a/ingestion/tests/unit/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py b/ingestion/tests/unit/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py
index a2848bda0fd..a37c91b8662 100644
--- a/ingestion/tests/unit/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py
+++ b/ingestion/tests/unit/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py
@@ -2,6 +2,15 @@ from unittest import TestCase
from unittest.mock import patch
from uuid import uuid4
+import pytest
+
+try:
+ import pyodbc # noqa: F401
+except ImportError:
+ # skip the test if pyodbc cannnot be imported: either because is not installed or
+ # because a broken dynamic library not found
+ pytest.skip("pyodbc not properly installed", allow_module_level=True)
+
from sqlalchemy import Column, Integer
from sqlalchemy.orm import declarative_base
from sqlalchemy.sql.selectable import CTE
diff --git a/ingestion/tests/unit/topology/pipeline/test_airflow.py b/ingestion/tests/unit/topology/pipeline/test_airflow.py
index 09e5828c4c4..0d721aa77b4 100644
--- a/ingestion/tests/unit/topology/pipeline/test_airflow.py
+++ b/ingestion/tests/unit/topology/pipeline/test_airflow.py
@@ -14,6 +14,13 @@ Test Airflow processing
from unittest import TestCase
from unittest.mock import patch
+import pytest
+
+try:
+ import airflow # noqa: F401
+except ImportError:
+ pytest.skip("Airflow dependencies not installed", allow_module_level=True)
+
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataWorkflowConfig,
)
diff --git a/ingestion/tests/integration/utils/test_helpers.py b/ingestion/tests/unit/utils/test_helpers.py
similarity index 100%
rename from ingestion/tests/integration/utils/test_helpers.py
rename to ingestion/tests/unit/utils/test_helpers.py