From ca812852d697ec2f5394449cc9eb3675ff04ecc8 Mon Sep 17 00:00:00 2001 From: Pere Menal-Ferrer Date: Tue, 27 May 2025 10:56:52 +0200 Subject: [PATCH] ci/nox-setup-testing (#21377) * Make pytest to user code from src rather than from install package * Fix test_amundsen: missing None * Update pytest configuration to use importlib mode * Fix custom_basemodel_validation to check model_fields on type(values) to prevent noisy warnings * Refactor referencedByQueries validation to use field_validator as per deprecation warning * Update ColumnJson to use model_rebuild rather as replacement for forward reference updates as per deprecation warning * Move superset test to integration test as they are using testcontainers * Update coverage source path * Fix wrong import. * Add install_dev_env target to Makefile for development dependencies * Add test-unit as extra in setup.py * Modify dependencies in dev environment. * Ignore all airflow tests * Remove coverage in unit_ingestion_dev_env. Revert coverage source to prevent broken CI. * Add nox for running unit test * FIx PowerBI integration test to use pathlib for resource paths and not os.getcwd to prevent failures when not executed from the right path * Move test_helpers.py to unit test, as it is not an integration test. * Remove utils empty folder in integration tests * Refactor testcontainers configuration to avoid pitfalls with max_tries setting * Add nox unit testing basic setup * Add format check session * Refactor nox-unit and add plugins tests * Add GHA for py-nox-ci * Add comment to GHA * Restore conftest.py file * Clarify comment * Simplify function * Fix matrix startegy and nox mismatch * Improve python version strategy with nox and GHA --------- Co-authored-by: Pere Menal --- .github/workflows/py-nox-ci.yml | 43 ++++++ .gitignore | 3 + ingestion/Makefile | 6 + .../{tests/integration/utils => }/__init__.py | 0 ingestion/noxfile.py | 138 ++++++++++++++++++ ingestion/setup.py | 8 +- ingestion/tests/integration/conftest.py | 10 ++ .../powerbi/test_powerbi_file_client.py | 8 +- .../tests/unit/airflow/test_lineage_parser.py | 11 +- .../test_ometa_validation_action.py | 33 +++-- .../azuresql/test_azuresql_sampling.py | 9 ++ .../unit/topology/pipeline/test_airflow.py | 7 + .../utils/test_helpers.py | 0 13 files changed, 252 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/py-nox-ci.yml rename ingestion/{tests/integration/utils => }/__init__.py (100%) create mode 100644 ingestion/noxfile.py rename ingestion/tests/{integration => unit}/utils/test_helpers.py (100%) diff --git a/.github/workflows/py-nox-ci.yml b/.github/workflows/py-nox-ci.yml new file mode 100644 index 00000000000..145f10d9ef7 --- /dev/null +++ b/.github/workflows/py-nox-ci.yml @@ -0,0 +1,43 @@ +name: Python Nox CI + +# This is a temporary workflow to run format and unit tests using Nox. +# It is intended to be manually triggered and will not run on pull requests or pushes. +# Once this is fully tested and stable, we might replace the existing Python CI workflow with this one +# to speed it up and simplify the process. + +on: + workflow_dispatch: # Manual trigger only + +jobs: + format-and-unit: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11"] + + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv and nox + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + pip install nox + + - name: Run Code Quality Checks + # We only want to check the format for a single Python version, + # no need to run it for all versions + if: ${{ matrix.python-version == '3.10' }} + run: | + nox -s lint + + - name: Run Unit Tests + run: | + PYTHON_VERSIONS="${{ matrix.python-version }}" nox -s unit + + - name: Run Unit Tests (specific plugins) + run: | + PYTHON_VERSIONS="${{ matrix.python-version }}" nox -s unit-plugins \ No newline at end of file diff --git a/.gitignore b/.gitignore index d17f53f6555..50de8df3975 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,6 @@ ingestion/tests/cli_e2e/**/*test.yaml # Cursor rules .cursorrules .cursor/ + +# Nox +ingestion/.nox/ diff --git a/ingestion/Makefile b/ingestion/Makefile index 4b8204c92d6..85a31df5cbf 100644 --- a/ingestion/Makefile +++ b/ingestion/Makefile @@ -15,6 +15,8 @@ install: ## Install the ingestion module to the current environment .PHONY: install_dev_env install_dev_env: ## Install all dependencies for development (in edit mode) + pip install --upgrade pip + pip install nox python -m pip install -e "$(INGESTION_DIR)[all-dev-env, dev, test-unit]" .PHONY: install_dev @@ -117,3 +119,7 @@ coverage: ## Run all Python tests and generate the coverage XML report $(MAKE) run_python_tests coverage xml --rcfile $(INGESTION_DIR)/pyproject.toml -o $(INGESTION_DIR)/coverage.xml || true sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")/src/g" $(INGESTION_DIR)/coverage.xml >> $(INGESTION_DIR)/ci-coverage.xml + +.PHONY: clean-nox +clean-nox: + rm -rf .nox \ No newline at end of file diff --git a/ingestion/tests/integration/utils/__init__.py b/ingestion/__init__.py similarity index 100% rename from ingestion/tests/integration/utils/__init__.py rename to ingestion/__init__.py diff --git a/ingestion/noxfile.py b/ingestion/noxfile.py new file mode 100644 index 00000000000..b90f828cf26 --- /dev/null +++ b/ingestion/noxfile.py @@ -0,0 +1,138 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Nox sessions for testing and formatting checks. +""" +import ast +import os +from pathlib import Path + +# NOTE: This is still a work in progress! We still need to: +# - Fix ignored unit tests +# - Add integration tests +# - Address the TODOs in the code +import nox + +# TODO: Add python 3.9. PYTHON 3.9 fails in Mac os due to problem with `psycopg2-binary` package + +SUPPORTED_PYTHON_VERSIONS = ["3.10", "3.11"] + + +def get_python_versions(): + # Check if we are in GitHub Actions (i.e., if the 'PYTHON_VERSIONS' environment variable is set) + if "PYTHON_VERSIONS" in os.environ: + # Return the list of Python versions passed from GitHub Actions matrix + python_versions = os.environ["PYTHON_VERSIONS"].split(",") + # if some versions are not supported, they will be ignored by nox + return python_versions + return SUPPORTED_PYTHON_VERSIONS + + +@nox.session( + name="lint", + reuse_venv=False, + venv_backend="uv|venv", +) +def lint(session): + # Usually, we want just one Python version for linting and type check, + # so no need to specify them here + session.install(".[dev]") + # Configuration from pyproject.toml is taken into account out of the box + session.run("black", "--check", ".", "../openmetadata-airflow-apis/") + session.run("isort", "--check-only", ".", "../openmetadata-airflow-apis/") + session.run("pycln", "--diff", ".", "../openmetadata-airflow-apis/") + # TODO: It remains to adapt the command from the Makefile: + # PYTHONPATH="${PYTHONPATH}:$(INGESTION_DIR)/plugins" pylint --errors-only + # --rcfile=$(INGESTION_DIR)/pyproject.toml --fail-under=10 $(PY_SOURCE)/metadata + # || (echo "PyLint error code $$?"; exit 1) + # Some work is required to import plugins correctly + + +@nox.session( + name="unit", reuse_venv=False, venv_backend="uv|venv", python=get_python_versions() +) +def unit(session): + session.install(".[all-dev-env, test-unit]") + # TODO: we need to install pip so that spaCy can install its dependencies + # we should find a way to avoid this + session.install("pip") + + # TODO: We need to remove ignored test once they can be run properly within nox + # Run unit tests + ignored_tests = [ + "test_ometa_endpoints.py", + "test_ometa_mlmodel.py", + "test_dbt.py", + "test_sample_usage.py", + "test_ssl_manager.py", + "test_usage_filter.py", + "test_import_checker.py", + "test_suite/", + "profiler/test_profiler_partitions.py", + "profiler/test_workflow.py", + "workflow", + "topology", + ] + ignore_args = [f"--ignore=tests/unit/{test}" for test in ignored_tests] + + session.run("pytest", "tests/unit/", *ignore_args) + + +# TEST PLUGINS +PLUGINS_TESTS = { + "great-expectations": "tests/unit/great_expectations", +} +PLUGINS = list(PLUGINS_TESTS.keys()) + + +@nox.session( + name="unit-plugins", + reuse_venv=True, + venv_backend="uv|venv", + python=get_python_versions(), +) +@nox.parametrize("plugin", PLUGINS) +def unit_plugins(session, plugin): + versions = extract_attribute_from_setup("VERSIONS", "setup.py") + + if not versions: + session.error("Not able to extract VERSIONS from setup.py") + session.exit(1) + + if plugin not in versions: + session.error( + f"Plugin {plugin} not found in VERSIONS. Available plugins: {list(versions)}" + ) + session.exit(1) + + session.install(".[test-unit]") + session.install(versions[plugin]) + # Assuming the plugin has its own tests in a specific directory + session.run("pytest", PLUGINS_TESTS[plugin]) + + +def extract_attribute_from_setup(attr_name: str, setup_path: str = "setup.py"): + # TODO: We should consider using a more robust method to extract attributes + # such as moving out the attributes to a separate file. + setup_file = Path(setup_path) + if not setup_file.exists(): + raise FileNotFoundError(f"{setup_path} not found") + + with setup_file.open("r") as f: + tree = ast.parse(f.read(), filename=setup_path) + + for node in ast.iter_child_nodes(tree): + if isinstance(node, ast.Assign): + for target in node.targets: + if isinstance(target, ast.Name) and target.id == attr_name: + return ast.literal_eval(node.value) + + return None # Not found diff --git a/ingestion/setup.py b/ingestion/setup.py index 99fa19cf008..fe73aef63d2 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -382,6 +382,8 @@ test_unit = { "pytest-order", "dirty-equals", "faker==37.1.0", # The version needs to be fixed to prevent flaky tests! + # TODO: Remove once no unit test requires testcontainers + "testcontainers", } test = { @@ -449,7 +451,7 @@ test = { "python-liquid", VERSIONS["google-cloud-bigtable"], *plugins["bigquery"], - "Faker==37.1.0", # Fixed the version to prevent flaky tests! + "faker==37.1.0", # The version needs to be fixed to prevent flaky tests! } if sys.version_info >= (3, 9): @@ -504,8 +506,8 @@ setup( "data-insight": list(plugins["elasticsearch"]), **{plugin: list(dependencies) for (plugin, dependencies) in plugins.items()}, # FIXME: all-dev-env is a temporary solution to install all dependencies except - # those that might conflict with each other or cause issues in the dev environment - # This covers all development cases where none of the plugins are used + # those that might conflict with each other or cause issues in the dev environment + # This covers all development cases where none of the plugins are used "all-dev-env": filter_requirements( {"airflow", "db2", "great-expectations", "pymssql"} ), diff --git a/ingestion/tests/integration/conftest.py b/ingestion/tests/integration/conftest.py index 4cfbd55c1a2..9826ba2bec0 100644 --- a/ingestion/tests/integration/conftest.py +++ b/ingestion/tests/integration/conftest.py @@ -43,6 +43,16 @@ def pytest_pycollect_makeitem(collector, name, obj): pass +# TODO: Will be addressed when cleaning up integration tests. +# Setting the max tries for testcontainers here has pitfalls, +# the main one being that it cannot be changed through the recommended +# way of using environment variables. The main problem is that +# waiting_utils.py uses testcontainers_config.timeout as a default +# value for the timeout. Therefore, if we want to effectively change +# this value, we must do so before the module is imported, +# which is a potential source of issues. + + @pytest.fixture(scope="session", autouse=sys.version_info >= (3, 9)) def config_testcontatiners(): from testcontainers.core.config import testcontainers_config diff --git a/ingestion/tests/integration/powerbi/test_powerbi_file_client.py b/ingestion/tests/integration/powerbi/test_powerbi_file_client.py index dd132d866b9..c18370f3e1b 100644 --- a/ingestion/tests/integration/powerbi/test_powerbi_file_client.py +++ b/ingestion/tests/integration/powerbi/test_powerbi_file_client.py @@ -13,7 +13,7 @@ PowerBI File Client tests """ -import os +from pathlib import Path from unittest import TestCase from metadata.generated.schema.entity.services.connections.dashboard.powerBIConnection import ( @@ -24,7 +24,7 @@ from metadata.ingestion.source.dashboard.powerbi.file_client import ( _get_datamodel_schema_list, ) -current_dir = os.getcwd() +RESOURCES_DIR = Path(__file__).parent / "resources" powerbi_connection_config = { "type": "PowerBI", @@ -36,8 +36,8 @@ powerbi_connection_config = { "useAdminApis": False, "pbitFilesSource": { "pbitFileConfigType": "local", - "path": f"{current_dir}/ingestion/tests/integration/powerbi/resources", - "pbitFilesExtractDir": f"{current_dir}/ingestion/tests/integration/powerbi/resources/extracted", + "path": str(RESOURCES_DIR), + "pbitFilesExtractDir": str(RESOURCES_DIR / "extracted"), }, } diff --git a/ingestion/tests/unit/airflow/test_lineage_parser.py b/ingestion/tests/unit/airflow/test_lineage_parser.py index 0e8a8756503..77145b5693a 100644 --- a/ingestion/tests/unit/airflow/test_lineage_parser.py +++ b/ingestion/tests/unit/airflow/test_lineage_parser.py @@ -14,9 +14,14 @@ Test lineage parser to get inlets and outlets information from datetime import datetime from typing import List, Set -from airflow import DAG -from airflow.operators.bash import BashOperator -from airflow.serialization.serde import serialize +import pytest + +try: + from airflow import DAG + from airflow.operators.bash import BashOperator + from airflow.serialization.serde import serialize +except ImportError: + pytest.skip("Airflow dependencies not installed", allow_module_level=True) from metadata.generated.schema.entity.data.container import Container from metadata.generated.schema.entity.data.dashboard import Dashboard diff --git a/ingestion/tests/unit/great_expectations/test_ometa_validation_action.py b/ingestion/tests/unit/great_expectations/test_ometa_validation_action.py index ad80d9235a7..d056f1f8a11 100644 --- a/ingestion/tests/unit/great_expectations/test_ometa_validation_action.py +++ b/ingestion/tests/unit/great_expectations/test_ometa_validation_action.py @@ -13,29 +13,35 @@ Test suite for the action module implementation """ import os -import subprocess -import sys from unittest import mock -import great_expectations as gx +import pytest from jinja2 import Environment from pytest import mark from metadata.great_expectations.utils.ometa_config_handler import render_template +_GX_0_18 = "0.18" -def install_gx_018x(): - """Install GX 0.18.x at runtime as we support 0.18.x and 1.x.x and setup will install 1 default version""" +try: + import great_expectations as gx - if not gx.__version__.startswith("0.18."): - subprocess.check_call( - [sys.executable, "-m", "pip", "install", "great-expectations~=0.18.0"] - ) - - -install_gx_018x() + from metadata.great_expectations.action import OpenMetadataValidationAction + + _gx_version_ok = gx.__version__.startswith(_GX_0_18) +except ImportError: + _gx_version_ok = False + +skip_gx = pytest.mark.skipif( + not _gx_version_ok, + reason=( + "Great Expectations not installed or version mismatch " + f"(required: {_GX_0_18})" + ), +) +@skip_gx @mark.parametrize( "input,expected", [ @@ -45,7 +51,6 @@ install_gx_018x() ) def test_get_table_entity(input, expected, mocked_ometa, mocked_ge_data_context): """Test get table entity""" - from metadata.great_expectations.action import OpenMetadataValidationAction ometa_validation = OpenMetadataValidationAction( data_context=mocked_ge_data_context, @@ -57,6 +62,7 @@ def test_get_table_entity(input, expected, mocked_ometa, mocked_ge_data_context) assert res._type == expected +@skip_gx @mark.parametrize( "input,expected", [ @@ -68,7 +74,6 @@ def test_get_table_entity_database_service_name( input, expected, mocked_ometa, mocked_ge_data_context ): """Test get table entity""" - from metadata.great_expectations.action import OpenMetadataValidationAction ometa_validation = OpenMetadataValidationAction( data_context=mocked_ge_data_context, diff --git a/ingestion/tests/unit/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py b/ingestion/tests/unit/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py index a2848bda0fd..a37c91b8662 100644 --- a/ingestion/tests/unit/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py +++ b/ingestion/tests/unit/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py @@ -2,6 +2,15 @@ from unittest import TestCase from unittest.mock import patch from uuid import uuid4 +import pytest + +try: + import pyodbc # noqa: F401 +except ImportError: + # skip the test if pyodbc cannnot be imported: either because is not installed or + # because a broken dynamic library not found + pytest.skip("pyodbc not properly installed", allow_module_level=True) + from sqlalchemy import Column, Integer from sqlalchemy.orm import declarative_base from sqlalchemy.sql.selectable import CTE diff --git a/ingestion/tests/unit/topology/pipeline/test_airflow.py b/ingestion/tests/unit/topology/pipeline/test_airflow.py index 09e5828c4c4..0d721aa77b4 100644 --- a/ingestion/tests/unit/topology/pipeline/test_airflow.py +++ b/ingestion/tests/unit/topology/pipeline/test_airflow.py @@ -14,6 +14,13 @@ Test Airflow processing from unittest import TestCase from unittest.mock import patch +import pytest + +try: + import airflow # noqa: F401 +except ImportError: + pytest.skip("Airflow dependencies not installed", allow_module_level=True) + from metadata.generated.schema.metadataIngestion.workflow import ( OpenMetadataWorkflowConfig, ) diff --git a/ingestion/tests/integration/utils/test_helpers.py b/ingestion/tests/unit/utils/test_helpers.py similarity index 100% rename from ingestion/tests/integration/utils/test_helpers.py rename to ingestion/tests/unit/utils/test_helpers.py