From 86a2930cfa6b707a3bceac43441d71decd7d82a9 Mon Sep 17 00:00:00 2001 From: IceS2 Date: Tue, 5 Mar 2024 16:00:22 +0100 Subject: [PATCH] Minor: Fix E2E Ingestion Tests (#15462) * Fix E2E Tests * Fix E2E Tests * Update mysql count, schema changes * Addition to vertica e2e * Temporary Github Action modification to test * Fix Redshift round issue post 10 digits * modify e2e gh file * fix gh error * fix matrix syntax * Fix Redash counts * Update py-cli-e2e-tests.yml * Fix Redshift referenced before assignment error * Revert Py tests e2e * Modify Elasticsearch configuration * Modify Elasticsearch configuration * Update docker-compose.yml * Test only running the python tests as e2e * Comment side effects * Test * Test * Fix name * Add missing shell property * Add bigquery to e2e * Uncomment needed step * test * test * test * test * Add control ci pipeline * Add new e2e tests * test * fix * fix * fix * Uncomment needed steps --------- Co-authored-by: Ayush Shah --- .../action.yml | 58 +++++++++++++++++ .github/workflows/py-cli-e2e-tests.yml | 63 +++++++------------ .github/workflows/py-tests.yml | 54 ++++------------ ingestion/setup.py | 1 - .../source/database/redshift/metadata.py | 2 +- ingestion/tests/cli_e2e/common/test_cli_db.py | 6 ++ .../cli_e2e/database/snowflake/snowflake.yaml | 1 + ingestion/tests/cli_e2e/test_cli_metabase.py | 2 +- ingestion/tests/cli_e2e/test_cli_mysql.py | 4 +- ingestion/tests/cli_e2e/test_cli_redash.py | 16 ++--- ingestion/tests/cli_e2e/test_cli_tableau.py | 4 +- ingestion/tests/cli_e2e/test_cli_vertica.py | 4 +- 12 files changed, 115 insertions(+), 100 deletions(-) create mode 100644 .github/actions/setup-openmetadata-test-environment/action.yml diff --git a/.github/actions/setup-openmetadata-test-environment/action.yml b/.github/actions/setup-openmetadata-test-environment/action.yml new file mode 100644 index 00000000000..fded07a9bfb --- /dev/null +++ b/.github/actions/setup-openmetadata-test-environment/action.yml @@ -0,0 +1,58 @@ +name: Setup OpenMetadata Test Environment +description: Steps needed to have a coherent test environment + +inputs: + python-version: + description: Python Version to install + required: true + +runs: + using: composite + steps: + # ---- Install Ubuntu Dependencies --------------------------------------------- + - name: Install Ubuntu dependencies + run: | + sudo apt-get update && sudo apt-get install -y unixodbc-dev python3-venv librdkafka-dev gcc libsasl2-dev build-essential libssl-dev libffi-dev \ + unixodbc-dev libevent-dev python3-dev libkrb5-dev + shell: bash + # ------------------------------------------------------------------------------ + + # ---- Setup Java -------------------------------------------------------------- + - name: Setup JDK 17 + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'temurin' + # ------------------------------------------------------------------------------ + + # ---- Setup Python Test Environment ------------------------------------------- + - name: Setup Python ${{ inputs.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ inputs.python-version }} + + - name: Generate Models + run: | + python3 -m venv env + source env/bin/activate + sudo make install_antlr_cli + make install_dev generate + shell: bash + + - name: Install Python Dependencies + run: | + source env/bin/activate + make install_all install_test + shell: bash + # ------------------------------------------------------------------------------ + + # ---- Start OpenMetadata Server and ingest Sample Data ------------------------ + - name: Start Server and Ingest Sample Data + uses: nick-fields/retry@v2.8.3 + env: + INGESTION_DEPENDENCY: "mysql,elasticsearch" + with: + timeout_minutes: 60 + max_attempts: 2 + retry_on: error + command: ./docker/run_local_docker.sh -m no-ui diff --git a/.github/workflows/py-cli-e2e-tests.yml b/.github/workflows/py-cli-e2e-tests.yml index 9ddbd9beaba..cb66b99c784 100644 --- a/.github/workflows/py-cli-e2e-tests.yml +++ b/.github/workflows/py-cli-e2e-tests.yml @@ -14,6 +14,10 @@ on: schedule: - cron: '0 0 * * *' workflow_dispatch: + input: + e2e-tests: + description: "E2E Tests to run" + type: string permissions: id-token: write @@ -25,24 +29,24 @@ jobs: strategy: fail-fast: false matrix: - e2e-test: ['bigquery', 'dbt_redshift', 'metabase', 'mssql', 'mysql', 'redash', 'snowflake', 'tableau', 'powerbi', 'vertica', 'python', 'redshift', 'quicksight', 'datalake_s3', 'postgres', 'oracle', 'athena', 'bigquery_multiple_project'] + e2e-test: ${{ fromJSON(inputs.e2e-tests || '["bigquery", "dbt_redshift", "metabase", "mssql", "mysql", "redash", "snowflake", "tableau", "powerbi", "vertica", "python", "redshift", "quicksight", "datalake_s3", "postgres", "oracle", "athena", "bigquery_multiple_project"]') }} environment: test steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: false + swap-storage: true + docker-images: false + - name: Checkout uses: actions/checkout@v3 - - name: Set up JDK 17 - uses: actions/setup-java@v3 - with: - java-version: '17' - distribution: 'temurin' - - - name: Set up Python 3.9 - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: configure aws credentials if: contains('quicksight', matrix.e2e-test) || contains('datalake_s3', matrix.e2e-test) || contains('athena', matrix.e2e-test) uses: aws-actions/configure-aws-credentials@v1 @@ -51,32 +55,11 @@ jobs: role-session-name: github-ci-aws-e2e-tests aws-region: ${{ secrets.E2E_AWS_REGION }} - - name: Install Ubuntu dependencies - run: | - sudo apt-get update && sudo apt-get install -y unixodbc-dev python3-venv librdkafka-dev gcc libsasl2-dev build-essential libssl-dev libffi-dev \ - unixodbc-dev libevent-dev python3-dev libkrb5-dev - - - name: Generate models - run: | - python3 -m venv env - source env/bin/activate - sudo make install_antlr_cli - make install_dev generate - - - name: Install open-metadata dependencies - run: | - source env/bin/activate - make install_all install_test - - - name: Start Server and Ingest Sample Data - uses: nick-fields/retry@v2.8.3 - env: - INGESTION_DEPENDENCY: "mysql,elasticsearch" + - name: Setup Openmetadata Test Environment + uses: ./.github/actions/setup-openmetadata-test-environment with: - timeout_minutes: 60 - max_attempts: 2 - retry_on: error - command: ./docker/run_local_docker.sh -m no-ui + python-version: 3.9 + - name: Run Python Tests & record coverage if: matrix.e2e-test == 'python' @@ -160,7 +143,7 @@ jobs: echo "except ImportError:" >> $SITE_CUSTOMIZE_PATH echo " pass" >> $SITE_CUSTOMIZE_PATH coverage run --rcfile ingestion/pyproject.toml -a --branch -m pytest -c ingestion/pyproject.toml --junitxml=ingestion/junit/test-results-$E2E_TEST.xml --ignore=ingestion/tests/unit/source ingestion/tests/cli_e2e/test_cli_$E2E_TEST.py - coverage combine --data-file=.coverage.$E2E_TEST --rcfile=ingestion/pyproject.toml --keep -a .coverage* + coverage combine --data-file=.coverage.$E2E_TEST --rcfile=ingestion/pyproject.toml --keep -a .coverage* coverage report --rcfile ingestion/pyproject.toml --data-file .coverage.$E2E_TEST || true - name: Upload coverage artifact for Python tests @@ -239,11 +222,11 @@ jobs: - name: Generate report run: | - for folder in artifacts/coverage-*; do + for folder in artifacts/coverage-*; do cp -rT $folder/ . ; done mkdir ingestion/junit - for folder in artifacts/tests-*; do + for folder in artifacts/tests-*; do cp -rT $folder/ ingestion/junit ; done source env/bin/activate diff --git a/.github/workflows/py-tests.yml b/.github/workflows/py-tests.yml index 433c15f546a..b4db635d3a0 100644 --- a/.github/workflows/py-tests.yml +++ b/.github/workflows/py-tests.yml @@ -42,13 +42,14 @@ jobs: - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@main with: - tool-cache: false - android: true - dotnet: true - haskell: true - large-packages: false - swap-storage: true - docker-images: false + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: false + swap-storage: true + docker-images: false + - name: Wait for the labeler uses: lewagon/wait-on-check-action@v1.3.3 if: ${{ github.event_name == 'pull_request_target' }} @@ -73,43 +74,10 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 - - name: Set up JDK 17 - uses: actions/setup-java@v3 + - name: Setup Openmetadata Test Environment + uses: ./.github/actions/setup-openmetadata-test-environment with: - java-version: '17' - distribution: 'temurin' - - - name: Set up Python ${{ matrix.py-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.py-version }} - - - name: Install Ubuntu dependencies - run: | - sudo apt-get update && sudo apt-get install -y unixodbc-dev python3-venv librdkafka-dev gcc libsasl2-dev build-essential libssl-dev libffi-dev \ - unixodbc-dev libevent-dev python3-dev libkrb5-dev - - - name: Generate models - run: | - python3 -m venv env - source env/bin/activate - sudo make install_antlr_cli - make install_dev generate - - - name: Install open-metadata dependencies - run: | - source env/bin/activate - make install_all install_test - - - name: Start Server and Ingest Sample Data - uses: nick-fields/retry@v2.8.3 - env: - INGESTION_DEPENDENCY: "mysql,elasticsearch" - with: - timeout_minutes: 60 - max_attempts: 2 - retry_on: error - command: ./docker/run_local_docker.sh -m no-ui + python-version: ${{ matrix.py-version}} - name: Run Python Tests if: ${{ matrix.py-version != '3.9' }} diff --git a/ingestion/setup.py b/ingestion/setup.py index 72e9bad4c93..887ee1a1508 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -190,7 +190,6 @@ plugins: Dict[str, Set[str]] = { "druid": {"pydruid>=0.6.5"}, "dynamodb": {VERSIONS["boto3"]}, "elasticsearch": { - "elasticsearch==7.13.1", VERSIONS["elasticsearch8"], }, # also requires requests-aws4auth which is in base "glue": {VERSIONS["boto3"]}, diff --git a/ingestion/src/metadata/ingestion/source/database/redshift/metadata.py b/ingestion/src/metadata/ingestion/source/database/redshift/metadata.py index 1be6bc31fb4..4a4bfcfd05c 100644 --- a/ingestion/src/metadata/ingestion/source/database/redshift/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/redshift/metadata.py @@ -233,7 +233,7 @@ class RedshiftSource( ) ] ) - return True, partition_details + return True, partition_details return False, None def process_additional_table_constraints( diff --git a/ingestion/tests/cli_e2e/common/test_cli_db.py b/ingestion/tests/cli_e2e/common/test_cli_db.py index a491b4536ba..69cf8acfadf 100644 --- a/ingestion/tests/cli_e2e/common/test_cli_db.py +++ b/ingestion/tests/cli_e2e/common/test_cli_db.py @@ -126,6 +126,12 @@ class CliCommonDB: if expected_column_profile: column_profile = column.profile.dict() for key in expected_column_profile: # type: ignore + if key == "nonParametricSkew": + self.assertTrue( + column_profile[key].__round__(10) + == expected_column_profile[key].__round__(10) + ) + continue self.assertTrue( column_profile[key] == expected_column_profile[key] ) diff --git a/ingestion/tests/cli_e2e/database/snowflake/snowflake.yaml b/ingestion/tests/cli_e2e/database/snowflake/snowflake.yaml index 1a4f8c7f319..0a331d72371 100644 --- a/ingestion/tests/cli_e2e/database/snowflake/snowflake.yaml +++ b/ingestion/tests/cli_e2e/database/snowflake/snowflake.yaml @@ -11,6 +11,7 @@ source: type: Snowflake connectionOptions: {} connectionArguments: {} + clientSessionKeepAlive: True sourceConfig: config: markDeletedTables: true diff --git a/ingestion/tests/cli_e2e/test_cli_metabase.py b/ingestion/tests/cli_e2e/test_cli_metabase.py index 434747dabf1..b92c441cb78 100644 --- a/ingestion/tests/cli_e2e/test_cli_metabase.py +++ b/ingestion/tests/cli_e2e/test_cli_metabase.py @@ -64,7 +64,7 @@ class MetabaseCliTest(CliCommonDashboard.TestSuite): return 6 def expected_lineage(self) -> int: - return 7 + return 8 def expected_tags(self) -> int: return 0 diff --git a/ingestion/tests/cli_e2e/test_cli_mysql.py b/ingestion/tests/cli_e2e/test_cli_mysql.py index e094001e56e..d4cbf45bcf9 100644 --- a/ingestion/tests/cli_e2e/test_cli_mysql.py +++ b/ingestion/tests/cli_e2e/test_cli_mysql.py @@ -117,7 +117,7 @@ class MysqlCliTest(CliCommonDB.TestSuite, SQACommonMethods): @staticmethod def expected_filtered_table_includes() -> int: - return 67 + return 78 @staticmethod def expected_filtered_table_excludes() -> int: @@ -125,4 +125,4 @@ class MysqlCliTest(CliCommonDB.TestSuite, SQACommonMethods): @staticmethod def expected_filtered_mix() -> int: - return 67 + return 78 diff --git a/ingestion/tests/cli_e2e/test_cli_redash.py b/ingestion/tests/cli_e2e/test_cli_redash.py index 5a5df830a4b..43177271a24 100644 --- a/ingestion/tests/cli_e2e/test_cli_redash.py +++ b/ingestion/tests/cli_e2e/test_cli_redash.py @@ -23,16 +23,16 @@ class RedashCliTest(CliCommonDashboard.TestSuite): return "redash" def get_includes_dashboards(self) -> List[str]: - return [".*Mil.*"] + return [".*Orders.*"] def get_excludes_dashboards(self) -> List[str]: - return ["Test"] + return [".*World.*"] def get_includes_charts(self) -> List[str]: - return ["4"] + return [".*Orders.*"] def get_excludes_charts(self) -> List[str]: - return [".*Query.*"] + return ["World Query Data"] # Redash do not ingest datamodels def get_includes_datamodels(self) -> List[str]: @@ -43,13 +43,13 @@ class RedashCliTest(CliCommonDashboard.TestSuite): return [] def expected_dashboards_and_charts(self) -> int: - return 12 + return 9 def expected_lineage(self) -> int: return 0 def expected_tags(self) -> int: - return 2 + return 1 def expected_datamodels(self) -> int: return 0 @@ -58,10 +58,10 @@ class RedashCliTest(CliCommonDashboard.TestSuite): return 0 def expected_filtered_mix(self) -> int: - return 6 + return 3 def expected_filtered_sink_mix(self) -> int: - return 8 + return 4 def expected_dashboards_and_charts_after_patch(self) -> int: return 1 diff --git a/ingestion/tests/cli_e2e/test_cli_tableau.py b/ingestion/tests/cli_e2e/test_cli_tableau.py index 4f305d16e26..d23ad04cafc 100644 --- a/ingestion/tests/cli_e2e/test_cli_tableau.py +++ b/ingestion/tests/cli_e2e/test_cli_tableau.py @@ -71,7 +71,7 @@ class TableauCliTest(CliCommonDashboard.TestSuite): return 2 def expected_filtered_sink_mix(self) -> int: - return 13 + return 9 def expected_dashboards_and_charts_after_patch(self) -> int: - return 5 + return 2 diff --git a/ingestion/tests/cli_e2e/test_cli_vertica.py b/ingestion/tests/cli_e2e/test_cli_vertica.py index 7264073068e..5b73a332e29 100644 --- a/ingestion/tests/cli_e2e/test_cli_vertica.py +++ b/ingestion/tests/cli_e2e/test_cli_vertica.py @@ -99,7 +99,7 @@ class VerticaCliTest(CliCommonDB.TestSuite, SQACommonMethods): @staticmethod def expected_filtered_table_includes() -> int: - return 6 + return 8 @staticmethod def expected_filtered_table_excludes() -> int: @@ -107,4 +107,4 @@ class VerticaCliTest(CliCommonDB.TestSuite, SQACommonMethods): @staticmethod def expected_filtered_mix() -> int: - return 5 + return 7