OpenMetadata/ingestion/tests/unit/conftest.py

import math

import sqlalchemy as sqa
from pytest import fixture


@fixture(scope="session", autouse=True)
def register_sqlite_math_functions():
    """
    Register custom math functions for SQLite used in unit tests.

    SQLite doesn't have built-in SQRT function, so we register Python's math.sqrt
    to make it available for all SQLite connections in tests.

    This runs automatically for all unit tests (autouse=True) and only once
    per test session (scope="session").
    """

    def safe_sqrt(x):
        """
        Safe square root that handles floating-point precision issues.

        When computing variance using AVG(x*x) - AVG(x)*AVG(x), floating-point
        precision can result in slightly negative values (e.g., -1e-15) when
        the true variance is zero. This function treats near-zero negative
        values as zero, matching the behavior in stddev.py:254-256.
        """
        if x is None:
            return None
        if x < 0:
            if abs(x) < 1e-10:
                return 0.0
            raise ValueError(f"Cannot compute square root of negative number: {x}")
        return math.sqrt(x)

    @sqa.event.listens_for(sqa.engine.Engine, "connect")
    def register_functions(dbapi_conn, connection_record):
        if "sqlite" in str(type(dbapi_conn)):
            dbapi_conn.create_function("SQRT", 1, safe_sqrt)

    yield

    # Clean up event listener after tests
    sqa.event.remove(sqa.engine.Engine, "connect", register_functions)


def pytest_pycollect_makeitem(collector, name, obj):
    try:
        if obj.__name__ in ("TestSuiteSource", "TestSuiteInterfaceFactory"):
            return []
        if obj.__base__.__name__ in ("BaseModel", "Enum"):
            return []
    except AttributeError:
        pass


def pytest_collection_modifyitems(session, config, items):
    """Reorder test items to ensure certain files run last."""
    # List of test files that should run last
    last_files = [
        "test_dependency_injector.py",
        # Add other files that should run last here
    ]

    # Get all test items that should run last
    last_items = []
    other_items = []

    for item in items:
        if any(file in item.nodeid for file in last_files):
            last_items.append(item)
        else:
            other_items.append(item)

    # Reorder the items
    items[:] = other_items + last_items
Feature/dimensionality column values stddev to be between (#24235) * Initial implementation for Dimensionality on Data Quality Tests * Fix ColumnValuesToBeUnique and create TestCaseResult API * Refactor dimension result * Initial E2E Implementation without Impact Score * Dimensionality Thin Slice * Update generated TypeScript types * Update generated TypeScript types * Removed useless method to use the one we already had * Fix Pandas Dimensionality checks * Remove useless comments * Implement PR comments, fix Tests * Improve the code a bit * Fix imports * Implement Dimensionality for ColumnMeanToBeBetween * Removed useless comments and improved minor things * Implement UnitTests * Fixes * Moved import pandas to type checking * Fix Min/Max being optional * Fix Unittests * small fixes * Fix Unittests * Fix Issue with counting total rows on mean * Improve code * Fix Merge * Removed unused type * Refactor to reduce code repetition and complexity * Fix conflict * Rename method * Refactor some metrics * Implement Dimensionality to ColumnLengthToBeBetween * Implement Dimensionality for ColumnMedianToBeBetween in Pandas * Implement Median Dimensionality for SQL * Add database tests * Fix median metric * Implement Dimensionality SumToBeBetween * Implement dimensionality for Column Values not In Set * Implement Dimensionality for ColumnValuestoMatchRegex and ColumnValuesToNotMatchRegex * Implement NotNull and MissingCount dimensionality * Implement columnValuesToBeBetween dimensionality * Fix test * Implement Pandas Dimensionality for ColumnValueStdDevToBeBetween * Implement Dimensionality for ColumnValuesStdDevToBeBetween * Fixed tests due to sqlite now supporting stddev --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2025-11-10 12:13:27 +01:00			`import math`

			`import sqlalchemy as sqa`
			`from pytest import fixture`


			`@fixture(scope="session", autouse=True)`
			`def register_sqlite_math_functions():`
			`"""`
			`Register custom math functions for SQLite used in unit tests.`

			`SQLite doesn't have built-in SQRT function, so we register Python's math.sqrt`
			`to make it available for all SQLite connections in tests.`

			`This runs automatically for all unit tests (autouse=True) and only once`
			`per test session (scope="session").`
			`"""`

			`def safe_sqrt(x):`
			`"""`
			`Safe square root that handles floating-point precision issues.`

			`When computing variance using AVG(xx) - AVG(x)AVG(x), floating-point`
			`precision can result in slightly negative values (e.g., -1e-15) when`
			`the true variance is zero. This function treats near-zero negative`
			`values as zero, matching the behavior in stddev.py:254-256.`
			`"""`
			`if x is None:`
			`return None`
			`if x < 0:`
			`if abs(x) < 1e-10:`
			`return 0.0`
			`raise ValueError(f"Cannot compute square root of negative number: {x}")`
			`return math.sqrt(x)`

			`@sqa.event.listens_for(sqa.engine.Engine, "connect")`
			`def register_functions(dbapi_conn, connection_record):`
			`if "sqlite" in str(type(dbapi_conn)):`
			`dbapi_conn.create_function("SQRT", 1, safe_sqrt)`

			`yield`

			`# Clean up event listener after tests`
			`sqa.event.remove(sqa.engine.Engine, "connect", register_functions)`


Fix #16421: add tableDiff test case (#16554) * feat: add tableDiff test case This changed introduces a "table diff" test case which compares two tables and fails if they are not identical. The similarity is made based on a specific "key" (because the test only makes sense when performed on ordered collections). 1. Added the `tableDiff` test definition. 2. Implemented a "runtime" parameters feature which injects additional parameters for the test at runtime. 3. Integration tests (because of course). This feature was not tested end-to-end yet because "array" data * pydantic v2 * format * format * format and added data diff to setup.py * format * fixed param issue which has type ARRAY * fixed runtime_parameter_setter * moved models to parent directory * handle errors in table diff * fixed issue with edit test case * format * added more details to pytest skip * format * refactor: Improve createTestCaseParameters function in DataQualityUtils * fixed unit test * removed unused fixture * removed validator.py * fixed tests * added validate kwarg to tests_mixin * removed "postgres" data diff extra as they interfere with psycopg2-binary * fixed tests * pinned tenacity for tests * reverted tenacity pinning * added ui support for test diff * fixed dq cypress and added edit flow * organized the test case * added dialect support * fixed tests * option style fix * fixed calculation for passing/failing rows * restrict the tableDiff test to limited services * set where to None if blank string * fixed where clause * fixed tests for where clause * use displayName in place of name in edit form * added docs for RuntimeParameterSetter * fixed cypress --------- Co-authored-by: Shailesh Parmar <shailesh.parmar.webdev@gmail.com> 2024-06-20 16:54:12 +02:00			`def pytest_pycollect_makeitem(collector, name, obj):`
			`try:`
moved int_admin_ometa to a dedicated module (#16768) 2024-06-25 07:51:22 +02:00			`if obj.__name__ in ("TestSuiteSource", "TestSuiteInterfaceFactory"):`
			`return []`
Fix #16421: add tableDiff test case (#16554) * feat: add tableDiff test case This changed introduces a "table diff" test case which compares two tables and fails if they are not identical. The similarity is made based on a specific "key" (because the test only makes sense when performed on ordered collections). 1. Added the `tableDiff` test definition. 2. Implemented a "runtime" parameters feature which injects additional parameters for the test at runtime. 3. Integration tests (because of course). This feature was not tested end-to-end yet because "array" data * pydantic v2 * format * format * format and added data diff to setup.py * format * fixed param issue which has type ARRAY * fixed runtime_parameter_setter * moved models to parent directory * handle errors in table diff * fixed issue with edit test case * format * added more details to pytest skip * format * refactor: Improve createTestCaseParameters function in DataQualityUtils * fixed unit test * removed unused fixture * removed validator.py * fixed tests * added validate kwarg to tests_mixin * removed "postgres" data diff extra as they interfere with psycopg2-binary * fixed tests * pinned tenacity for tests * reverted tenacity pinning * added ui support for test diff * fixed dq cypress and added edit flow * organized the test case * added dialect support * fixed tests * option style fix * fixed calculation for passing/failing rows * restrict the tableDiff test to limited services * set where to None if blank string * fixed where clause * fixed tests for where clause * use displayName in place of name in edit form * added docs for RuntimeParameterSetter * fixed cypress --------- Co-authored-by: Shailesh Parmar <shailesh.parmar.webdev@gmail.com> 2024-06-20 16:54:12 +02:00			`if obj.__base__.__name__ in ("BaseModel", "Enum"):`
			`return []`
			`except AttributeError:`
			`pass`
MINOR: Implement dependency injection on ingestion (#21719) * Initial implementation for our Connection Class * Implement the Initial Connection class * Add Unit Tests * Implement Dependency Injection for the Ingestion Framework * Fix Test * Fix Profile Test Connection * Fix test, making the injection test run last * Update connections.py * Changed NewType to an AbstractClass to avoid linting issues * remove comment * Fix bug in service spec * Update PyTest version to avoid importlib.reader wrong import 2025-06-16 08:03:38 +02:00

			`def pytest_collection_modifyitems(session, config, items):`
			`"""Reorder test items to ensure certain files run last."""`
			`# List of test files that should run last`
			`last_files = [`
			`"test_dependency_injector.py",`
			`# Add other files that should run last here`
			`]`

			`# Get all test items that should run last`
			`last_items = []`
			`other_items = []`

			`for item in items:`
			`if any(file in item.nodeid for file in last_files):`
			`last_items.append(item)`
			`else:`
			`other_items.append(item)`

			`# Reorder the items`
			`items[:] = other_items + last_items`