From 69557e87162cc1cb1fc1016a73d36e6b714989cf Mon Sep 17 00:00:00 2001 From: Akash Verma <138790903+akashverma0786@users.noreply.github.com> Date: Mon, 16 Dec 2024 19:35:20 +0530 Subject: [PATCH] fixes: #15742 Oracle stored package feature (#18852) --- .../source/database/oracle/connection.py | 26 ++++++++- .../source/database/oracle/metadata.py | 55 ++++++++++++------- .../source/database/oracle/models.py | 9 +-- .../source/database/oracle/queries.py | 23 +++++++- .../unit/topology/database/test_oracle.py | 45 ++++++++++++++- .../data/testConnections/database/oracle.json | 16 ++++-- .../schema/entity/data/storedProcedure.json | 6 +- 7 files changed, 144 insertions(+), 36 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/oracle/connection.py b/ingestion/src/metadata/ingestion/source/database/oracle/connection.py index 83665684cbc..0c9f7db3e23 100644 --- a/ingestion/src/metadata/ingestion/source/database/oracle/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/oracle/connection.py @@ -41,7 +41,11 @@ from metadata.ingestion.connections.builders import ( ) from metadata.ingestion.connections.test_connections import test_connection_db_common from metadata.ingestion.ometa.ometa_api import OpenMetadata -from metadata.ingestion.source.database.oracle.queries import CHECK_ACCESS_TO_ALL +from metadata.ingestion.source.database.oracle.queries import ( + CHECK_ACCESS_TO_ALL, + ORACLE_GET_SCHEMA, + ORACLE_GET_STORED_PACKAGES, +) from metadata.utils.constants import THREE_MIN from metadata.utils.logger import ingestion_logger @@ -131,6 +135,12 @@ def get_connection(connection: OracleConnection) -> Engine: ) +class OraclePackageAccessError(Exception): + """ + Raised when unable to access Oracle stored packages + """ + + def test_connection( metadata: OpenMetadata, engine: Engine, @@ -143,7 +153,19 @@ def test_connection( of a metadata workflow or during an Automation Workflow """ - test_conn_queries = {"CheckAccess": CHECK_ACCESS_TO_ALL} + def test_oracle_package_access(engine): + try: + schema_name = engine.execute(ORACLE_GET_SCHEMA).scalar() + return ORACLE_GET_STORED_PACKAGES.format(schema=schema_name) + except Exception as e: + raise OraclePackageAccessError( + f"Failed to access Oracle stored packages: {e}" + ) + + test_conn_queries = { + "CheckAccess": CHECK_ACCESS_TO_ALL, + "PackageAccess": test_oracle_package_access(engine), + } return test_connection_db_common( metadata=metadata, diff --git a/ingestion/src/metadata/ingestion/source/database/oracle/metadata.py b/ingestion/src/metadata/ingestion/source/database/oracle/metadata.py index 0f861c7440d..35bc5dc4582 100644 --- a/ingestion/src/metadata/ingestion/source/database/oracle/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/oracle/metadata.py @@ -24,6 +24,7 @@ from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema from metadata.generated.schema.entity.data.storedProcedure import ( Language, StoredProcedureCode, + StoredProcedureType, ) from metadata.generated.schema.entity.data.table import TableType from metadata.generated.schema.entity.services.connections.database.oracleConnection import ( @@ -45,10 +46,11 @@ from metadata.ingestion.source.database.common_db_source import ( TableNameAndType, ) from metadata.ingestion.source.database.oracle.models import ( - FetchProcedureList, - OracleStoredProcedure, + FetchObjectList, + OracleStoredObject, ) from metadata.ingestion.source.database.oracle.queries import ( + ORACLE_GET_STORED_PACKAGES, ORACLE_GET_STORED_PROCEDURES, ) from metadata.ingestion.source.database.oracle.utils import ( @@ -181,41 +183,51 @@ class OracleSource(CommonDbSourceService): logger.warning(f"Failed to fetch Schema definition for {table_name}: {exc}") return None - def process_result(self, data: FetchProcedureList): + def process_result(self, data: FetchObjectList): """Process data as per our stored procedure format""" result_dict = {} for row in data: - owner, name, line, text = row + + owner, name, line, text, procedure_type = row key = (owner, name) if key not in result_dict: - result_dict[key] = {"lines": [], "text": ""} + result_dict[key] = {"lines": [], "text": "", "procedure_type": ""} result_dict[key]["lines"].append(line) result_dict[key]["text"] += text + result_dict[key]["procedure_type"] = procedure_type # Return the concatenated text for each procedure name, ordered by line return result_dict - def get_stored_procedures(self) -> Iterable[OracleStoredProcedure]: + def _get_stored_procedures_internal( + self, query: str + ) -> Iterable[OracleStoredObject]: + results: FetchObjectList = self.engine.execute( + query.format(schema=self.context.get().database_schema.upper()) + ).all() + results = self.process_result(data=results) + for row in results.items(): + stored_procedure = OracleStoredObject( + name=row[0][1], + definition=row[1]["text"], + owner=row[0][0], + procedure_type=row[1]["procedure_type"], + ) + yield stored_procedure + + def get_stored_procedures(self) -> Iterable[OracleStoredObject]: """List Oracle Stored Procedures""" if self.source_config.includeStoredProcedures: - results: FetchProcedureList = self.engine.execute( - ORACLE_GET_STORED_PROCEDURES.format( - schema=self.context.get().database_schema.upper() - ) - ).all() - results = self.process_result(data=results) - for row in results.items(): - stored_procedure = OracleStoredProcedure( - name=row[0][1], definition=row[1]["text"], owner=row[0][0] - ) - yield stored_procedure + yield from self._get_stored_procedures_internal( + ORACLE_GET_STORED_PROCEDURES + ) + yield from self._get_stored_procedures_internal(ORACLE_GET_STORED_PACKAGES) def yield_stored_procedure( - self, stored_procedure: OracleStoredProcedure + self, stored_procedure: OracleStoredObject ) -> Iterable[Either[CreateStoredProcedureRequest]]: """Prepare the stored procedure payload""" - try: stored_procedure_request = CreateStoredProcedureRequest( name=EntityName(stored_procedure.name), @@ -223,6 +235,11 @@ class OracleSource(CommonDbSourceService): language=Language.SQL, code=stored_procedure.definition, ), + storedProcedureType=( + StoredProcedureType.StoredPackage + if stored_procedure.procedure_type == "StoredPackage" + else StoredProcedureType.StoredProcedure + ), owners=self.metadata.get_reference_by_name( name=stored_procedure.owner.lower(), is_owner=True ), diff --git a/ingestion/src/metadata/ingestion/source/database/oracle/models.py b/ingestion/src/metadata/ingestion/source/database/oracle/models.py index ecd786df540..4211c2f6d32 100644 --- a/ingestion/src/metadata/ingestion/source/database/oracle/models.py +++ b/ingestion/src/metadata/ingestion/source/database/oracle/models.py @@ -6,7 +6,7 @@ from typing import List, Optional from pydantic import BaseModel, Field -class OracleStoredProcedure(BaseModel): +class OracleStoredObject(BaseModel): """Oracle Stored Procedure list query results""" name: str @@ -15,9 +15,10 @@ class OracleStoredProcedure(BaseModel): None, description="Will only be informed for non-SQL routines." ) owner: str + procedure_type: Optional[str] = Field(None, alias="procedure_type") -class FetchProcedure(BaseModel): +class FetchObject(BaseModel): """Oracle Fetch Stored Procedure Raw Model""" owner: Optional[str] = None @@ -26,5 +27,5 @@ class FetchProcedure(BaseModel): text: str -class FetchProcedureList(BaseModel): - __name__: List[FetchProcedure] +class FetchObjectList(BaseModel): + __name__: List[FetchObject] diff --git a/ingestion/src/metadata/ingestion/source/database/oracle/queries.py b/ingestion/src/metadata/ingestion/source/database/oracle/queries.py index 026dec08e1f..14ec20f65a9 100644 --- a/ingestion/src/metadata/ingestion/source/database/oracle/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/oracle/queries.py @@ -87,13 +87,34 @@ SELECT OWNER, NAME, LINE, - TEXT + TEXT, + 'StoredProcedure' as procedure_type FROM DBA_SOURCE WHERE type = 'PROCEDURE' and owner = '{schema}' """ ) +ORACLE_GET_SCHEMA = """ + SELECT USERNAME AS SCHEMA_NAME + FROM ALL_USERS + WHERE ROWNUM = 1 + ORDER BY USERNAME +""" +ORACLE_GET_STORED_PACKAGES = textwrap.dedent( + """ +SELECT + OWNER, + NAME, + LINE, + TEXT, + 'StoredPackage' as procedure_type + +FROM + DBA_SOURCE +WHERE TYPE IN ('PACKAGE', 'PACKAGE BODY') AND owner = '{schema}' +""" +) CHECK_ACCESS_TO_ALL = "SELECT table_name FROM DBA_TABLES where ROWNUM < 2" ORACLE_GET_STORED_PROCEDURE_QUERIES = textwrap.dedent( """ diff --git a/ingestion/tests/unit/topology/database/test_oracle.py b/ingestion/tests/unit/topology/database/test_oracle.py index af368b07b33..cbbdcdfddd7 100644 --- a/ingestion/tests/unit/topology/database/test_oracle.py +++ b/ingestion/tests/unit/topology/database/test_oracle.py @@ -24,7 +24,10 @@ from metadata.generated.schema.api.data.createStoredProcedure import ( ) from metadata.generated.schema.entity.data.database import Database from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema -from metadata.generated.schema.entity.data.storedProcedure import StoredProcedureCode +from metadata.generated.schema.entity.data.storedProcedure import ( + StoredProcedureCode, + StoredProcedureType, +) from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( OpenMetadataConnection, ) @@ -40,7 +43,7 @@ from metadata.generated.schema.type.basic import EntityName, FullyQualifiedEntit from metadata.generated.schema.type.entityReference import EntityReference from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.source.database.oracle.metadata import OracleSource -from metadata.ingestion.source.database.oracle.models import OracleStoredProcedure +from metadata.ingestion.source.database.oracle.models import OracleStoredObject mock_oracle_config = { "source": { @@ -103,10 +106,18 @@ MOCK_DATABASE_SCHEMA = DatabaseSchema( ), ) -MOCK_STORED_PROCEDURE = OracleStoredProcedure( +MOCK_STORED_PROCEDURE = OracleStoredObject( name="sample_procedure", definition="SAMPLE_SQL_TEXT", owner="sample_stored_prcedure_owner", + procedure_type="StoredProcedure", +) + +MOCK_STORED_PACKAGE = OracleStoredObject( + name="sample_package", + definition="SAMPLE_SQL_TEXT", + owner="sample_stored_package_owner", + procedure_type="StoredPackage", ) EXPECTED_DATABASE = [ @@ -154,6 +165,28 @@ EXPECTED_STORED_PROCEDURE = [ owners=None, tags=None, storedProcedureCode=StoredProcedureCode(language="SQL", code="SAMPLE_SQL_TEXT"), + storedProcedureType=StoredProcedureType.StoredProcedure, + databaseSchema=FullyQualifiedEntityName( + "oracle_source_test.sample_database.sample_schema" + ), + extension=None, + dataProducts=None, + sourceUrl=None, + domain=None, + lifeCycle=None, + sourceHash=None, + ) +] + +EXPECTED_STORED_PACKAGE = [ + CreateStoredProcedureRequest( + name=EntityName("sample_package"), + displayName=None, + description=None, + owners=None, + tags=None, + storedProcedureCode=StoredProcedureCode(language="SQL", code="SAMPLE_SQL_TEXT"), + storedProcedureType=StoredProcedureType.StoredPackage, databaseSchema=FullyQualifiedEntityName( "oracle_source_test.sample_database.sample_schema" ), @@ -221,3 +254,9 @@ class OracleUnitTest(TestCase): either.right for either in self.oracle.yield_stored_procedure(MOCK_STORED_PROCEDURE) ] + + def test_yield_stored_package(self): + assert EXPECTED_STORED_PACKAGE == [ + either.right + for either in self.oracle.yield_stored_procedure(MOCK_STORED_PACKAGE) + ] diff --git a/openmetadata-service/src/main/resources/json/data/testConnections/database/oracle.json b/openmetadata-service/src/main/resources/json/data/testConnections/database/oracle.json index 99eebc1b6ac..f0e481b8ba3 100644 --- a/openmetadata-service/src/main/resources/json/data/testConnections/database/oracle.json +++ b/openmetadata-service/src/main/resources/json/data/testConnections/database/oracle.json @@ -4,12 +4,18 @@ "description": "This Test Connection validates the access against the database and basic metadata extraction of schemas and tables.", "steps": [ { - "name": "CheckAccess", + "name": "CheckAccess", "description": "Validate that we can properly reach the database and authenticate with the given credentials.", "errorMessage": "Failed to connect to oracle, please validate if the user has relevant permissions, if not, please provide the necessary permissions. For more details, please refer https://docs.open-metadata.org/connectors/database/oracle.", "shortCircuit": true, "mandatory": true }, + { + "name": "PackageAccess", + "description": "Validate that we can access Oracle stored packages.", + "errorMessage": "Failed to access Oracle stored packages. Please verify the user has the necessary permissions to access Oracle packages.", + "mandatory": false + }, { "name": "GetSchemas", "description": "List all the schemas available to the user.", @@ -17,18 +23,16 @@ "mandatory": true }, { - "name": "GetTables", + "name": "GetTables", "description": "From a given schema, list the tables belonging to that schema. If no schema is specified, we'll list the tables of a random schema.", "errorMessage": "Failed to fetch tables, please validate if the user has enough privilege to fetch tables.", "mandatory": true }, { "name": "GetViews", - "description": "From a given schema, list the views belonging to that schema. If no schema is specified, we'll list the tables of a random schema.", + "description": "From a given schema, list the views belonging to that schema. If no schema is specified, we'll list the tables of a random schema.", "errorMessage": "Failed to fetch views, please validate if the user has enough privilege to fetch views.", "mandatory": false } ] - } - - \ No newline at end of file + } \ No newline at end of file diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/data/storedProcedure.json b/openmetadata-spec/src/main/resources/json/schema/entity/data/storedProcedure.json index 25133d8bc05..416558e9b46 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/data/storedProcedure.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/data/storedProcedure.json @@ -15,7 +15,8 @@ "default": "StoredProcedure", "enum": [ "StoredProcedure", - "UDF" + "UDF", + "StoredPackage" ], "javaEnums": [ { @@ -23,6 +24,9 @@ }, { "name": "UDF" + }, + { + "name": "StoredPackage" } ] },