diff --git a/.github/workflows/py-tests-3_9.yml b/.github/workflows/py-tests-3_9.yml index d376c157c67..1519cc54842 100644 --- a/.github/workflows/py-tests-3_9.yml +++ b/.github/workflows/py-tests-3_9.yml @@ -69,8 +69,24 @@ jobs: make coverage rm pom.xml - - name: Run Sonar + - name: Run PR Sonar uses: sonarsource/sonarcloud-github-action@master + if: ${{ github.event_name == 'pull_request_target' }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SONAR_TOKEN: ${{ secrets.INGESTION_SONAR_SECRET }} + with: + projectBaseDir: ingestion/ + args: > + -Dsonar.pullrequest.key=${{ github.event.pull_request.number }} + -Dsonar.pullrequest.branch=${{ github.head_ref }} + -Dsonar.pullrequest.github.repository=OpenMetadata + -Dsonar.scm.revision=${{ github.event.pull_request.head.sha }} + -Dsonar.pullrequest.provider=github + + - name: Run Sonar + uses: sonarsource/sonarcloud-github-action@master + if: ${{ github.event_name == 'push' }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} SONAR_TOKEN: ${{ secrets.INGESTION_SONAR_SECRET }} diff --git a/ingestion/sonar-project.properties b/ingestion/sonar-project.properties index 8879006a8c0..91f1f0192f9 100644 --- a/ingestion/sonar-project.properties +++ b/ingestion/sonar-project.properties @@ -3,9 +3,9 @@ sonar.projectName=open-metadata-ingestion sonar.organization=open-metadata sonar.language=py -sonar.sources=ingestion/src -sonar.tests=ingestion/tests -sonar.exclusions=ingestion/src/metadata_server/static/**,ingestion/src/metadata_server/templates/** -sonar.python.xunit.reportPath=ingestion/junit/test-results-*.xml -sonar.python.coverage.reportPaths=ingestion/coverage.xml -sonar.python.version=3.8,3.9 +sonar.sources=src +sonar.tests=tests +sonar.exclusions=src/metadata_server/static/**,ingestion/src/metadata_server/templates/** +sonar.python.xunit.reportPath=junit/test-results-*.xml +sonar.python.coverage.reportPaths=coverage.xml +sonar.python.version=3.7,3.8,3.9 diff --git a/ingestion/src/metadata/ingestion/ometa/ometa_api.py b/ingestion/src/metadata/ingestion/ometa/ometa_api.py index 9d8e7786008..aa4758944c0 100644 --- a/ingestion/src/metadata/ingestion/ometa/ometa_api.py +++ b/ingestion/src/metadata/ingestion/ometa/ometa_api.py @@ -16,7 +16,7 @@ working with OpenMetadata entities. """ import urllib -from typing import Dict, Generic, List, Optional, Type, TypeVar, Union +from typing import Dict, Generic, Iterable, List, Optional, Type, TypeVar, Union try: from typing import get_args @@ -552,6 +552,39 @@ class OpenMetadata( after = resp["paging"]["after"] if "after" in resp["paging"] else None return EntityList(entities=entities, total=total, after=after) + def list_all_entities( + self, + entity: Type[T], + fields: Optional[List[str]] = None, + limit: int = 1000, + params: Optional[Dict[str, str]] = None, + ) -> Iterable[T]: + """ + Utility method that paginates over all EntityLists + to return a generator to fetch entities + :param entity: Entity Type, such as Table + :param fields: Extra fields to return + :param limit: Number of entities in each pagination + :param params: Extra parameters, e.g., {"service": "serviceName"} to filter + :return: Generator that will be yielding all Entities + """ + + # First batch of Entities + entity_list = self.list_entities( + entity=entity, fields=fields, limit=limit, params=params + ) + for elem in entity_list.entities: + yield elem + + after = entity_list.after + while after: + entity_list = self.list_entities( + entity=entity, fields=fields, limit=limit, params=params, after=after + ) + for elem in entity_list.entities: + yield elem + after = entity_list.after + def list_versions( self, entity_id: Union[str, basic.Uuid], entity: Type[T] ) -> EntityVersionHistory: diff --git a/ingestion/src/metadata/orm_profiler/api/workflow.py b/ingestion/src/metadata/orm_profiler/api/workflow.py index 7e8c68f26b4..8cc2de65e4d 100644 --- a/ingestion/src/metadata/orm_profiler/api/workflow.py +++ b/ingestion/src/metadata/orm_profiler/api/workflow.py @@ -148,7 +148,7 @@ class ProfilerWorkflow: """ # First, get all the databases for the service: - all_dbs = self.metadata.list_entities( + all_dbs = self.metadata.list_all_entities( entity=Database, params={"service": self.config.source.serviceName}, ) @@ -156,7 +156,7 @@ class ProfilerWorkflow: # Then list all tables from each db. # This returns a nested structure [[db1 tables], [db2 tables]...] all_tables = [ - self.metadata.list_entities( + self.metadata.list_all_entities( entity=Table, fields=[ "tableProfile", @@ -165,8 +165,8 @@ class ProfilerWorkflow: params={ "database": f"{self.config.source.serviceName}.{database.name.__root__}" }, - ).entities - for database in all_dbs.entities + ) + for database in all_dbs ] # Flatten the structure into a List[Table] diff --git a/ingestion/tests/integration/ometa/test_ometa_table_api.py b/ingestion/tests/integration/ometa/test_ometa_table_api.py index e41bb6c2246..4d62df8aced 100644 --- a/ingestion/tests/integration/ometa/test_ometa_table_api.py +++ b/ingestion/tests/integration/ometa/test_ometa_table_api.py @@ -13,6 +13,7 @@ OpenMetadata high-level API Table test """ import uuid +from copy import deepcopy from datetime import datetime from unittest import TestCase @@ -223,7 +224,7 @@ class OMetaTableTest(TestCase): self.metadata.create_or_update(data=self.create) - res = self.metadata.list_entities(entity=Table, limit=100) + res = self.metadata.list_entities(entity=Table) # Fetch our test Database. We have already inserted it, so we should find it data = next( @@ -231,6 +232,22 @@ class OMetaTableTest(TestCase): ) assert data + def test_list_all(self): + """ + Validate generator utility to fetch all tables + """ + fake_create = deepcopy(self.create) + for i in range(0, 10): + fake_create.name = self.create.name.__root__ + str(i) + self.metadata.create_or_update(data=fake_create) + + all_entities = self.metadata.list_all_entities( + entity=Table, limit=2 # paginate in batches of pairs + ) + assert ( + len(list(all_entities)) >= 10 + ) # In case the default testing entity is not present + def test_delete(self): """ We can delete a Table by ID