ci/nox-setup-testing (#21377)

* Make pytest to user code from src rather than from install package

* Fix test_amundsen: missing None

* Update pytest configuration to use importlib mode

* Fix custom_basemodel_validation to check model_fields on type(values) to prevent noisy warnings

* Refactor referencedByQueries validation to use field_validator as per deprecation warning

* Update ColumnJson to use model_rebuild rather as replacement for forward reference updates as per deprecation warning

* Move superset test to integration test as they are using testcontainers

* Update coverage source path

* Fix wrong import.

* Add install_dev_env target to Makefile for development dependencies

* Add test-unit as extra in setup.py

* Modify dependencies in dev environment.

* Ignore all airflow tests

* Remove coverage in unit_ingestion_dev_env. Revert coverage source to prevent broken CI.

* Add nox for running unit test

* FIx PowerBI integration test to use pathlib for resource paths and not os.getcwd to prevent failures when not executed from the right path

* Move test_helpers.py to unit test, as it is not an integration test.

* Remove utils empty folder in integration tests

* Refactor testcontainers configuration to avoid pitfalls with max_tries setting

* Add nox unit testing basic setup

* Add format check session

* Refactor nox-unit and add plugins tests

* Add GHA for py-nox-ci

* Add comment to GHA

* Restore conftest.py file

* Clarify comment

* Simplify function

* Fix matrix startegy and nox mismatch

* Improve python version strategy with nox and GHA

---------

Co-authored-by: Pere Menal <pere.menal@getcollate.io>
This commit is contained in:
Pere Menal-Ferrer 2025-05-27 10:56:52 +02:00 committed by GitHub
parent 182ca660c6
commit ca812852d6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 252 additions and 24 deletions

43
.github/workflows/py-nox-ci.yml vendored Normal file
View File

@ -0,0 +1,43 @@
name: Python Nox CI
# This is a temporary workflow to run format and unit tests using Nox.
# It is intended to be manually triggered and will not run on pull requests or pushes.
# Once this is fully tested and stable, we might replace the existing Python CI workflow with this one
# to speed it up and simplify the process.
on:
workflow_dispatch: # Manual trigger only
jobs:
format-and-unit:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install uv and nox
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
pip install nox
- name: Run Code Quality Checks
# We only want to check the format for a single Python version,
# no need to run it for all versions
if: ${{ matrix.python-version == '3.10' }}
run: |
nox -s lint
- name: Run Unit Tests
run: |
PYTHON_VERSIONS="${{ matrix.python-version }}" nox -s unit
- name: Run Unit Tests (specific plugins)
run: |
PYTHON_VERSIONS="${{ matrix.python-version }}" nox -s unit-plugins

3
.gitignore vendored
View File

@ -131,3 +131,6 @@ ingestion/tests/cli_e2e/**/*test.yaml
# Cursor rules
.cursorrules
.cursor/
# Nox
ingestion/.nox/

View File

@ -15,6 +15,8 @@ install: ## Install the ingestion module to the current environment
.PHONY: install_dev_env
install_dev_env: ## Install all dependencies for development (in edit mode)
pip install --upgrade pip
pip install nox
python -m pip install -e "$(INGESTION_DIR)[all-dev-env, dev, test-unit]"
.PHONY: install_dev
@ -117,3 +119,7 @@ coverage: ## Run all Python tests and generate the coverage XML report
$(MAKE) run_python_tests
coverage xml --rcfile $(INGESTION_DIR)/pyproject.toml -o $(INGESTION_DIR)/coverage.xml || true
sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")/src/g" $(INGESTION_DIR)/coverage.xml >> $(INGESTION_DIR)/ci-coverage.xml
.PHONY: clean-nox
clean-nox:
rm -rf .nox

138
ingestion/noxfile.py Normal file
View File

@ -0,0 +1,138 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Nox sessions for testing and formatting checks.
"""
import ast
import os
from pathlib import Path
# NOTE: This is still a work in progress! We still need to:
# - Fix ignored unit tests
# - Add integration tests
# - Address the TODOs in the code
import nox
# TODO: Add python 3.9. PYTHON 3.9 fails in Mac os due to problem with `psycopg2-binary` package
SUPPORTED_PYTHON_VERSIONS = ["3.10", "3.11"]
def get_python_versions():
# Check if we are in GitHub Actions (i.e., if the 'PYTHON_VERSIONS' environment variable is set)
if "PYTHON_VERSIONS" in os.environ:
# Return the list of Python versions passed from GitHub Actions matrix
python_versions = os.environ["PYTHON_VERSIONS"].split(",")
# if some versions are not supported, they will be ignored by nox
return python_versions
return SUPPORTED_PYTHON_VERSIONS
@nox.session(
name="lint",
reuse_venv=False,
venv_backend="uv|venv",
)
def lint(session):
# Usually, we want just one Python version for linting and type check,
# so no need to specify them here
session.install(".[dev]")
# Configuration from pyproject.toml is taken into account out of the box
session.run("black", "--check", ".", "../openmetadata-airflow-apis/")
session.run("isort", "--check-only", ".", "../openmetadata-airflow-apis/")
session.run("pycln", "--diff", ".", "../openmetadata-airflow-apis/")
# TODO: It remains to adapt the command from the Makefile:
# PYTHONPATH="${PYTHONPATH}:$(INGESTION_DIR)/plugins" pylint --errors-only
# --rcfile=$(INGESTION_DIR)/pyproject.toml --fail-under=10 $(PY_SOURCE)/metadata
# || (echo "PyLint error code $$?"; exit 1)
# Some work is required to import plugins correctly
@nox.session(
name="unit", reuse_venv=False, venv_backend="uv|venv", python=get_python_versions()
)
def unit(session):
session.install(".[all-dev-env, test-unit]")
# TODO: we need to install pip so that spaCy can install its dependencies
# we should find a way to avoid this
session.install("pip")
# TODO: We need to remove ignored test once they can be run properly within nox
# Run unit tests
ignored_tests = [
"test_ometa_endpoints.py",
"test_ometa_mlmodel.py",
"test_dbt.py",
"test_sample_usage.py",
"test_ssl_manager.py",
"test_usage_filter.py",
"test_import_checker.py",
"test_suite/",
"profiler/test_profiler_partitions.py",
"profiler/test_workflow.py",
"workflow",
"topology",
]
ignore_args = [f"--ignore=tests/unit/{test}" for test in ignored_tests]
session.run("pytest", "tests/unit/", *ignore_args)
# TEST PLUGINS
PLUGINS_TESTS = {
"great-expectations": "tests/unit/great_expectations",
}
PLUGINS = list(PLUGINS_TESTS.keys())
@nox.session(
name="unit-plugins",
reuse_venv=True,
venv_backend="uv|venv",
python=get_python_versions(),
)
@nox.parametrize("plugin", PLUGINS)
def unit_plugins(session, plugin):
versions = extract_attribute_from_setup("VERSIONS", "setup.py")
if not versions:
session.error("Not able to extract VERSIONS from setup.py")
session.exit(1)
if plugin not in versions:
session.error(
f"Plugin {plugin} not found in VERSIONS. Available plugins: {list(versions)}"
)
session.exit(1)
session.install(".[test-unit]")
session.install(versions[plugin])
# Assuming the plugin has its own tests in a specific directory
session.run("pytest", PLUGINS_TESTS[plugin])
def extract_attribute_from_setup(attr_name: str, setup_path: str = "setup.py"):
# TODO: We should consider using a more robust method to extract attributes
# such as moving out the attributes to a separate file.
setup_file = Path(setup_path)
if not setup_file.exists():
raise FileNotFoundError(f"{setup_path} not found")
with setup_file.open("r") as f:
tree = ast.parse(f.read(), filename=setup_path)
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name) and target.id == attr_name:
return ast.literal_eval(node.value)
return None # Not found

View File

@ -382,6 +382,8 @@ test_unit = {
"pytest-order",
"dirty-equals",
"faker==37.1.0", # The version needs to be fixed to prevent flaky tests!
# TODO: Remove once no unit test requires testcontainers
"testcontainers",
}
test = {
@ -449,7 +451,7 @@ test = {
"python-liquid",
VERSIONS["google-cloud-bigtable"],
*plugins["bigquery"],
"Faker==37.1.0", # Fixed the version to prevent flaky tests!
"faker==37.1.0", # The version needs to be fixed to prevent flaky tests!
}
if sys.version_info >= (3, 9):

View File

@ -43,6 +43,16 @@ def pytest_pycollect_makeitem(collector, name, obj):
pass
# TODO: Will be addressed when cleaning up integration tests.
# Setting the max tries for testcontainers here has pitfalls,
# the main one being that it cannot be changed through the recommended
# way of using environment variables. The main problem is that
# waiting_utils.py uses testcontainers_config.timeout as a default
# value for the timeout. Therefore, if we want to effectively change
# this value, we must do so before the module is imported,
# which is a potential source of issues.
@pytest.fixture(scope="session", autouse=sys.version_info >= (3, 9))
def config_testcontatiners():
from testcontainers.core.config import testcontainers_config

View File

@ -13,7 +13,7 @@
PowerBI File Client tests
"""
import os
from pathlib import Path
from unittest import TestCase
from metadata.generated.schema.entity.services.connections.dashboard.powerBIConnection import (
@ -24,7 +24,7 @@ from metadata.ingestion.source.dashboard.powerbi.file_client import (
_get_datamodel_schema_list,
)
current_dir = os.getcwd()
RESOURCES_DIR = Path(__file__).parent / "resources"
powerbi_connection_config = {
"type": "PowerBI",
@ -36,8 +36,8 @@ powerbi_connection_config = {
"useAdminApis": False,
"pbitFilesSource": {
"pbitFileConfigType": "local",
"path": f"{current_dir}/ingestion/tests/integration/powerbi/resources",
"pbitFilesExtractDir": f"{current_dir}/ingestion/tests/integration/powerbi/resources/extracted",
"path": str(RESOURCES_DIR),
"pbitFilesExtractDir": str(RESOURCES_DIR / "extracted"),
},
}

View File

@ -14,9 +14,14 @@ Test lineage parser to get inlets and outlets information
from datetime import datetime
from typing import List, Set
import pytest
try:
from airflow import DAG
from airflow.operators.bash import BashOperator
from airflow.serialization.serde import serialize
except ImportError:
pytest.skip("Airflow dependencies not installed", allow_module_level=True)
from metadata.generated.schema.entity.data.container import Container
from metadata.generated.schema.entity.data.dashboard import Dashboard

View File

@ -13,29 +13,35 @@ Test suite for the action module implementation
"""
import os
import subprocess
import sys
from unittest import mock
import great_expectations as gx
import pytest
from jinja2 import Environment
from pytest import mark
from metadata.great_expectations.utils.ometa_config_handler import render_template
_GX_0_18 = "0.18"
def install_gx_018x():
"""Install GX 0.18.x at runtime as we support 0.18.x and 1.x.x and setup will install 1 default version"""
try:
import great_expectations as gx
if not gx.__version__.startswith("0.18."):
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "great-expectations~=0.18.0"]
from metadata.great_expectations.action import OpenMetadataValidationAction
_gx_version_ok = gx.__version__.startswith(_GX_0_18)
except ImportError:
_gx_version_ok = False
skip_gx = pytest.mark.skipif(
not _gx_version_ok,
reason=(
"Great Expectations not installed or version mismatch "
f"(required: {_GX_0_18})"
),
)
install_gx_018x()
@skip_gx
@mark.parametrize(
"input,expected",
[
@ -45,7 +51,6 @@ install_gx_018x()
)
def test_get_table_entity(input, expected, mocked_ometa, mocked_ge_data_context):
"""Test get table entity"""
from metadata.great_expectations.action import OpenMetadataValidationAction
ometa_validation = OpenMetadataValidationAction(
data_context=mocked_ge_data_context,
@ -57,6 +62,7 @@ def test_get_table_entity(input, expected, mocked_ometa, mocked_ge_data_context)
assert res._type == expected
@skip_gx
@mark.parametrize(
"input,expected",
[
@ -68,7 +74,6 @@ def test_get_table_entity_database_service_name(
input, expected, mocked_ometa, mocked_ge_data_context
):
"""Test get table entity"""
from metadata.great_expectations.action import OpenMetadataValidationAction
ometa_validation = OpenMetadataValidationAction(
data_context=mocked_ge_data_context,

View File

@ -2,6 +2,15 @@ from unittest import TestCase
from unittest.mock import patch
from uuid import uuid4
import pytest
try:
import pyodbc # noqa: F401
except ImportError:
# skip the test if pyodbc cannnot be imported: either because is not installed or
# because a broken dynamic library not found
pytest.skip("pyodbc not properly installed", allow_module_level=True)
from sqlalchemy import Column, Integer
from sqlalchemy.orm import declarative_base
from sqlalchemy.sql.selectable import CTE

View File

@ -14,6 +14,13 @@ Test Airflow processing
from unittest import TestCase
from unittest.mock import patch
import pytest
try:
import airflow # noqa: F401
except ImportError:
pytest.skip("Airflow dependencies not installed", allow_module_level=True)
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataWorkflowConfig,
)