ci/nox-setup-testing (#21377)

* Make pytest to user code from src rather than from install package

* Fix test_amundsen: missing None

* Update pytest configuration to use importlib mode

* Fix custom_basemodel_validation to check model_fields on type(values) to prevent noisy warnings

* Refactor referencedByQueries validation to use field_validator as per deprecation warning

* Update ColumnJson to use model_rebuild rather as replacement for forward reference updates as per deprecation warning

* Move superset test to integration test as they are using testcontainers

* Update coverage source path

* Fix wrong import.

* Add install_dev_env target to Makefile for development dependencies

* Add test-unit as extra in setup.py

* Modify dependencies in dev environment.

* Ignore all airflow tests

* Remove coverage in unit_ingestion_dev_env. Revert coverage source to prevent broken CI.

* Add nox for running unit test

* FIx PowerBI integration test to use pathlib for resource paths and not os.getcwd to prevent failures when not executed from the right path

* Move test_helpers.py to unit test, as it is not an integration test.

* Remove utils empty folder in integration tests

* Refactor testcontainers configuration to avoid pitfalls with max_tries setting

* Add nox unit testing basic setup

* Add format check session

* Refactor nox-unit and add plugins tests

* Add GHA for py-nox-ci

* Add comment to GHA

* Restore conftest.py file

* Clarify comment

* Simplify function

* Fix matrix startegy and nox mismatch

* Improve python version strategy with nox and GHA

---------

Co-authored-by: Pere Menal <pere.menal@getcollate.io>
This commit is contained in:
Pere Menal-Ferrer 2025-05-27 10:56:52 +02:00 committed by GitHub
parent 182ca660c6
commit ca812852d6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 252 additions and 24 deletions

43
.github/workflows/py-nox-ci.yml vendored Normal file
View File

@ -0,0 +1,43 @@
name: Python Nox CI
# This is a temporary workflow to run format and unit tests using Nox.
# It is intended to be manually triggered and will not run on pull requests or pushes.
# Once this is fully tested and stable, we might replace the existing Python CI workflow with this one
# to speed it up and simplify the process.
on:
workflow_dispatch: # Manual trigger only
jobs:
format-and-unit:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install uv and nox
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
pip install nox
- name: Run Code Quality Checks
# We only want to check the format for a single Python version,
# no need to run it for all versions
if: ${{ matrix.python-version == '3.10' }}
run: |
nox -s lint
- name: Run Unit Tests
run: |
PYTHON_VERSIONS="${{ matrix.python-version }}" nox -s unit
- name: Run Unit Tests (specific plugins)
run: |
PYTHON_VERSIONS="${{ matrix.python-version }}" nox -s unit-plugins

3
.gitignore vendored
View File

@ -131,3 +131,6 @@ ingestion/tests/cli_e2e/**/*test.yaml
# Cursor rules
.cursorrules
.cursor/
# Nox
ingestion/.nox/

View File

@ -15,6 +15,8 @@ install: ## Install the ingestion module to the current environment
.PHONY: install_dev_env
install_dev_env: ## Install all dependencies for development (in edit mode)
pip install --upgrade pip
pip install nox
python -m pip install -e "$(INGESTION_DIR)[all-dev-env, dev, test-unit]"
.PHONY: install_dev
@ -117,3 +119,7 @@ coverage: ## Run all Python tests and generate the coverage XML report
$(MAKE) run_python_tests
coverage xml --rcfile $(INGESTION_DIR)/pyproject.toml -o $(INGESTION_DIR)/coverage.xml || true
sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")/src/g" $(INGESTION_DIR)/coverage.xml >> $(INGESTION_DIR)/ci-coverage.xml
.PHONY: clean-nox
clean-nox:
rm -rf .nox

138
ingestion/noxfile.py Normal file
View File

@ -0,0 +1,138 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Nox sessions for testing and formatting checks.
"""
import ast
import os
from pathlib import Path
# NOTE: This is still a work in progress! We still need to:
# - Fix ignored unit tests
# - Add integration tests
# - Address the TODOs in the code
import nox
# TODO: Add python 3.9. PYTHON 3.9 fails in Mac os due to problem with `psycopg2-binary` package
SUPPORTED_PYTHON_VERSIONS = ["3.10", "3.11"]
def get_python_versions():
# Check if we are in GitHub Actions (i.e., if the 'PYTHON_VERSIONS' environment variable is set)
if "PYTHON_VERSIONS" in os.environ:
# Return the list of Python versions passed from GitHub Actions matrix
python_versions = os.environ["PYTHON_VERSIONS"].split(",")
# if some versions are not supported, they will be ignored by nox
return python_versions
return SUPPORTED_PYTHON_VERSIONS
@nox.session(
name="lint",
reuse_venv=False,
venv_backend="uv|venv",
)
def lint(session):
# Usually, we want just one Python version for linting and type check,
# so no need to specify them here
session.install(".[dev]")
# Configuration from pyproject.toml is taken into account out of the box
session.run("black", "--check", ".", "../openmetadata-airflow-apis/")
session.run("isort", "--check-only", ".", "../openmetadata-airflow-apis/")
session.run("pycln", "--diff", ".", "../openmetadata-airflow-apis/")
# TODO: It remains to adapt the command from the Makefile:
# PYTHONPATH="${PYTHONPATH}:$(INGESTION_DIR)/plugins" pylint --errors-only
# --rcfile=$(INGESTION_DIR)/pyproject.toml --fail-under=10 $(PY_SOURCE)/metadata
# || (echo "PyLint error code $$?"; exit 1)
# Some work is required to import plugins correctly
@nox.session(
name="unit", reuse_venv=False, venv_backend="uv|venv", python=get_python_versions()
)
def unit(session):
session.install(".[all-dev-env, test-unit]")
# TODO: we need to install pip so that spaCy can install its dependencies
# we should find a way to avoid this
session.install("pip")
# TODO: We need to remove ignored test once they can be run properly within nox
# Run unit tests
ignored_tests = [
"test_ometa_endpoints.py",
"test_ometa_mlmodel.py",
"test_dbt.py",
"test_sample_usage.py",
"test_ssl_manager.py",
"test_usage_filter.py",
"test_import_checker.py",
"test_suite/",
"profiler/test_profiler_partitions.py",
"profiler/test_workflow.py",
"workflow",
"topology",
]
ignore_args = [f"--ignore=tests/unit/{test}" for test in ignored_tests]
session.run("pytest", "tests/unit/", *ignore_args)
# TEST PLUGINS
PLUGINS_TESTS = {
"great-expectations": "tests/unit/great_expectations",
}
PLUGINS = list(PLUGINS_TESTS.keys())
@nox.session(
name="unit-plugins",
reuse_venv=True,
venv_backend="uv|venv",
python=get_python_versions(),
)
@nox.parametrize("plugin", PLUGINS)
def unit_plugins(session, plugin):
versions = extract_attribute_from_setup("VERSIONS", "setup.py")
if not versions:
session.error("Not able to extract VERSIONS from setup.py")
session.exit(1)
if plugin not in versions:
session.error(
f"Plugin {plugin} not found in VERSIONS. Available plugins: {list(versions)}"
)
session.exit(1)
session.install(".[test-unit]")
session.install(versions[plugin])
# Assuming the plugin has its own tests in a specific directory
session.run("pytest", PLUGINS_TESTS[plugin])
def extract_attribute_from_setup(attr_name: str, setup_path: str = "setup.py"):
# TODO: We should consider using a more robust method to extract attributes
# such as moving out the attributes to a separate file.
setup_file = Path(setup_path)
if not setup_file.exists():
raise FileNotFoundError(f"{setup_path} not found")
with setup_file.open("r") as f:
tree = ast.parse(f.read(), filename=setup_path)
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name) and target.id == attr_name:
return ast.literal_eval(node.value)
return None # Not found

View File

@ -382,6 +382,8 @@ test_unit = {
"pytest-order",
"dirty-equals",
"faker==37.1.0", # The version needs to be fixed to prevent flaky tests!
# TODO: Remove once no unit test requires testcontainers
"testcontainers",
}
test = {
@ -449,7 +451,7 @@ test = {
"python-liquid",
VERSIONS["google-cloud-bigtable"],
*plugins["bigquery"],
"Faker==37.1.0", # Fixed the version to prevent flaky tests!
"faker==37.1.0", # The version needs to be fixed to prevent flaky tests!
}
if sys.version_info >= (3, 9):
@ -504,8 +506,8 @@ setup(
"data-insight": list(plugins["elasticsearch"]),
**{plugin: list(dependencies) for (plugin, dependencies) in plugins.items()},
# FIXME: all-dev-env is a temporary solution to install all dependencies except
# those that might conflict with each other or cause issues in the dev environment
# This covers all development cases where none of the plugins are used
# those that might conflict with each other or cause issues in the dev environment
# This covers all development cases where none of the plugins are used
"all-dev-env": filter_requirements(
{"airflow", "db2", "great-expectations", "pymssql"}
),

View File

@ -43,6 +43,16 @@ def pytest_pycollect_makeitem(collector, name, obj):
pass
# TODO: Will be addressed when cleaning up integration tests.
# Setting the max tries for testcontainers here has pitfalls,
# the main one being that it cannot be changed through the recommended
# way of using environment variables. The main problem is that
# waiting_utils.py uses testcontainers_config.timeout as a default
# value for the timeout. Therefore, if we want to effectively change
# this value, we must do so before the module is imported,
# which is a potential source of issues.
@pytest.fixture(scope="session", autouse=sys.version_info >= (3, 9))
def config_testcontatiners():
from testcontainers.core.config import testcontainers_config

View File

@ -13,7 +13,7 @@
PowerBI File Client tests
"""
import os
from pathlib import Path
from unittest import TestCase
from metadata.generated.schema.entity.services.connections.dashboard.powerBIConnection import (
@ -24,7 +24,7 @@ from metadata.ingestion.source.dashboard.powerbi.file_client import (
_get_datamodel_schema_list,
)
current_dir = os.getcwd()
RESOURCES_DIR = Path(__file__).parent / "resources"
powerbi_connection_config = {
"type": "PowerBI",
@ -36,8 +36,8 @@ powerbi_connection_config = {
"useAdminApis": False,
"pbitFilesSource": {
"pbitFileConfigType": "local",
"path": f"{current_dir}/ingestion/tests/integration/powerbi/resources",
"pbitFilesExtractDir": f"{current_dir}/ingestion/tests/integration/powerbi/resources/extracted",
"path": str(RESOURCES_DIR),
"pbitFilesExtractDir": str(RESOURCES_DIR / "extracted"),
},
}

View File

@ -14,9 +14,14 @@ Test lineage parser to get inlets and outlets information
from datetime import datetime
from typing import List, Set
from airflow import DAG
from airflow.operators.bash import BashOperator
from airflow.serialization.serde import serialize
import pytest
try:
from airflow import DAG
from airflow.operators.bash import BashOperator
from airflow.serialization.serde import serialize
except ImportError:
pytest.skip("Airflow dependencies not installed", allow_module_level=True)
from metadata.generated.schema.entity.data.container import Container
from metadata.generated.schema.entity.data.dashboard import Dashboard

View File

@ -13,29 +13,35 @@ Test suite for the action module implementation
"""
import os
import subprocess
import sys
from unittest import mock
import great_expectations as gx
import pytest
from jinja2 import Environment
from pytest import mark
from metadata.great_expectations.utils.ometa_config_handler import render_template
_GX_0_18 = "0.18"
def install_gx_018x():
"""Install GX 0.18.x at runtime as we support 0.18.x and 1.x.x and setup will install 1 default version"""
try:
import great_expectations as gx
if not gx.__version__.startswith("0.18."):
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "great-expectations~=0.18.0"]
)
install_gx_018x()
from metadata.great_expectations.action import OpenMetadataValidationAction
_gx_version_ok = gx.__version__.startswith(_GX_0_18)
except ImportError:
_gx_version_ok = False
skip_gx = pytest.mark.skipif(
not _gx_version_ok,
reason=(
"Great Expectations not installed or version mismatch "
f"(required: {_GX_0_18})"
),
)
@skip_gx
@mark.parametrize(
"input,expected",
[
@ -45,7 +51,6 @@ install_gx_018x()
)
def test_get_table_entity(input, expected, mocked_ometa, mocked_ge_data_context):
"""Test get table entity"""
from metadata.great_expectations.action import OpenMetadataValidationAction
ometa_validation = OpenMetadataValidationAction(
data_context=mocked_ge_data_context,
@ -57,6 +62,7 @@ def test_get_table_entity(input, expected, mocked_ometa, mocked_ge_data_context)
assert res._type == expected
@skip_gx
@mark.parametrize(
"input,expected",
[
@ -68,7 +74,6 @@ def test_get_table_entity_database_service_name(
input, expected, mocked_ometa, mocked_ge_data_context
):
"""Test get table entity"""
from metadata.great_expectations.action import OpenMetadataValidationAction
ometa_validation = OpenMetadataValidationAction(
data_context=mocked_ge_data_context,

View File

@ -2,6 +2,15 @@ from unittest import TestCase
from unittest.mock import patch
from uuid import uuid4
import pytest
try:
import pyodbc # noqa: F401
except ImportError:
# skip the test if pyodbc cannnot be imported: either because is not installed or
# because a broken dynamic library not found
pytest.skip("pyodbc not properly installed", allow_module_level=True)
from sqlalchemy import Column, Integer
from sqlalchemy.orm import declarative_base
from sqlalchemy.sql.selectable import CTE

View File

@ -14,6 +14,13 @@ Test Airflow processing
from unittest import TestCase
from unittest.mock import patch
import pytest
try:
import airflow # noqa: F401
except ImportError:
pytest.skip("Airflow dependencies not installed", allow_module_level=True)
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataWorkflowConfig,
)