diff --git a/.github/workflows/py-tests-postgres.yml b/.github/workflows/py-tests-postgres.yml new file mode 100644 index 00000000000..c8caeacd0b1 --- /dev/null +++ b/.github/workflows/py-tests-postgres.yml @@ -0,0 +1,97 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: py-tests-postgres +on: + workflow_dispatch: + push: + branches: + - main + - '0.[0-9]+.[0-9]+' + paths-ignore: + - 'openmetadata-docs/**' + - 'openmetadata-docs-v1/**' + pull_request_target: + types: [labeled, opened, synchronize, reopened] + paths: + - "ingestion/**" + - "openmetadata-service/**" + - "openmetadata-spec/src/main/resources/json/schema/**" + - "pom.xml" + - "Makefile" + +permissions: + contents: read + +jobs: + py-run-tests: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + # We'll test postgres and opensearch with a single python version to save time and resources + py-version: ['3.10'] + steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: false + swap-storage: true + docker-images: false + + - name: Wait for the labeler + uses: lewagon/wait-on-check-action@v1.3.3 + if: ${{ github.event_name == 'pull_request_target' }} + with: + ref: ${{ github.event.pull_request.head.sha }} + check-name: Team Label + repo-token: ${{ secrets.GITHUB_TOKEN }} + wait-interval: 90 + + - name: Verify PR labels + uses: jesusvasquez333/verify-pr-label-action@v1.4.0 + if: ${{ github.event_name == 'pull_request_target' }} + with: + github-token: '${{ secrets.GITHUB_TOKEN }}' + valid-labels: 'safe to test' + pull-request-number: '${{ github.event.pull_request.number }}' + disable-reviews: true # To not auto approve changes + + - name: Checkout + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Setup Openmetadata Test Environment + uses: ./.github/actions/setup-openmetadata-test-environment + with: + python-version: ${{ matrix.py-version}} + args: "-m no-ui -d postgresql" + ingestion_dependency: "mysql,elasticsearch,sample-data" + + - name: Run Python Tests + if: ${{ matrix.py-version != '3.9' }} + run: | + source env/bin/activate + make run_python_tests + env: + TESTCONTAINERS_RYUK_DISABLED: true + + - name: Clean Up + run: | + cd ./docker/development + docker compose down --remove-orphans + sudo rm -rf ${PWD}/docker-volume diff --git a/docker/development/docker-compose.yml b/docker/development/docker-compose.yml index 804c7e9a801..ba05ead6d89 100644 --- a/docker/development/docker-compose.yml +++ b/docker/development/docker-compose.yml @@ -63,7 +63,6 @@ services: retries: 10 volumes: - es-data:/usr/share/elasticsearch/data - execute-migrate-all: build: diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/es_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/es_mixin.py index 1fac47a6a66..d8a809afc3e 100644 --- a/ingestion/src/metadata/ingestion/ometa/mixins/es_mixin.py +++ b/ingestion/src/metadata/ingestion/ometa/mixins/es_mixin.py @@ -331,19 +331,9 @@ class ESMixin(Generic[T]): else: break - # Get the data - for hit in response.hits.hits: - try: - yield self.get_by_name( - entity=entity, - fqn=hit.source["fullyQualifiedName"], - fields=fields, - nullable=False, # Raise an error if we don't find the Entity - ) - except Exception as exc: - logger.warning( - f"Error while getting {hit.source['fullyQualifiedName']} - {exc}" - ) + yield from self._yield_hits_from_api( + response=response, entity=entity, fields=fields + ) # Get next page last_hit = response.hits.hits[-1] if response.hits.hits else None @@ -362,3 +352,20 @@ class ESMixin(Generic[T]): logger.debug(traceback.format_exc()) logger.warning(f"Error while getting ES response: {exc}") return None + + def _yield_hits_from_api( + self, response: ESResponse, entity: Type[T], fields: Optional[List[str]] + ) -> Iterator[T]: + """Get the data from the API based on ES responses""" + for hit in response.hits.hits: + try: + yield self.get_by_name( + entity=entity, + fqn=hit.source["fullyQualifiedName"], + fields=fields, + nullable=False, # Raise an error if we don't find the Entity + ) + except Exception as exc: + logger.warning( + f"Error while getting {hit.source['fullyQualifiedName']} - {exc}" + ) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java index acbe9c4bf4c..f29831de980 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java @@ -378,6 +378,10 @@ public class OpenSearchClient implements SearchClient { } } + if (!nullOrEmpty(request.getSearchAfter())) { + searchSourceBuilder.searchAfter(request.getSearchAfter()); + } + /* For backward-compatibility we continue supporting the deleted argument, this should be removed in future versions */ if (request .getIndex()