| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  Copyright 2025 Collate | 
					
						
							|  |  |  | #  Licensed under the Collate Community License, Version 1.0 (the "License"); | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """Test Ometa Dataframe utility tests""" | 
					
						
							| 
									
										
										
										
											2023-09-19 08:21:38 +02:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  | import unittest | 
					
						
							|  |  |  | from unittest.mock import patch | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import pyarrow.parquet as pq | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from metadata.generated.schema.entity.data.table import Table | 
					
						
							|  |  |  | from metadata.generated.schema.metadataIngestion.workflow import ( | 
					
						
							|  |  |  |     OpenMetadataWorkflowConfig, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2023-05-19 18:54:28 +05:30
										 |  |  | from metadata.generated.schema.type.entityReference import EntityReference | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  | from metadata.ingestion.source.database.datalake.metadata import DatalakeSource | 
					
						
							|  |  |  | from metadata.mixins.pandas.pandas_mixin import PandasInterfaceMixin | 
					
						
							| 
									
										
										
										
											2023-09-13 15:15:49 +05:30
										 |  |  | from metadata.readers.dataframe.reader_factory import SupportedTypes | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  | from .topology.database.test_datalake import mock_datalake_config | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-19 08:21:38 +02:00
										 |  |  | ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  | resp_parquet_file = ( | 
					
						
							| 
									
										
										
										
											2023-09-19 08:21:38 +02:00
										 |  |  |     pq.ParquetFile(os.path.join(ROOT_DIR, "test_ometa_to_dataframe.parquet")) | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |     .read() | 
					
						
							|  |  |  |     .to_pandas() | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | method_resp_file = [resp_parquet_file] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class TestStringMethods(unittest.TestCase): | 
					
						
							|  |  |  |     def test_dl_column_parser(self): | 
					
						
							|  |  |  |         with patch( | 
					
						
							| 
									
										
										
										
											2023-05-19 18:54:28 +05:30
										 |  |  |             "metadata.utils.datalake.datalake_utils.fetch_dataframe", | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |             return_value=method_resp_file, | 
					
						
							|  |  |  |         ) as exec_mock_method: | 
					
						
							|  |  |  |             resp = exec_mock_method("key", "string") | 
					
						
							|  |  |  |             assert type(resp) == list | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @patch( | 
					
						
							|  |  |  |         "metadata.ingestion.source.database.database_service.DatabaseServiceSource.test_connection" | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-11-27 08:50:54 +01:00
										 |  |  |     def test_get_dataframes(self, test_connection): | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |         with patch( | 
					
						
							| 
									
										
										
										
											2023-05-19 18:54:28 +05:30
										 |  |  |             "metadata.mixins.pandas.pandas_mixin.fetch_dataframe", | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |             return_value=[resp_parquet_file], | 
					
						
							|  |  |  |         ): | 
					
						
							| 
									
										
										
										
											2024-06-07 04:36:17 +02:00
										 |  |  |             config = OpenMetadataWorkflowConfig.model_validate(mock_datalake_config) | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |             datalake_source = DatalakeSource.create( | 
					
						
							|  |  |  |                 mock_datalake_config["source"], | 
					
						
							|  |  |  |                 config.workflowConfig.openMetadataServerConfig, | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2024-11-27 08:50:54 +01:00
										 |  |  |             resp = PandasInterfaceMixin().get_dataframes( | 
					
						
							| 
									
										
										
										
											2023-05-19 18:54:28 +05:30
										 |  |  |                 service_connection_config=datalake_source.service_connection, | 
					
						
							|  |  |  |                 table=Table( | 
					
						
							|  |  |  |                     id="cec14ccf-123f-4271-8c90-0ae54cc4227e", | 
					
						
							|  |  |  |                     columns=[], | 
					
						
							|  |  |  |                     name="test", | 
					
						
							|  |  |  |                     databaseSchema=EntityReference( | 
					
						
							|  |  |  |                         name="Test", | 
					
						
							|  |  |  |                         id="cec14ccf-123f-4271-8c90-0ae54cc4227e", | 
					
						
							|  |  |  |                         type="databaseSchema", | 
					
						
							|  |  |  |                     ), | 
					
						
							| 
									
										
										
										
											2023-09-13 15:15:49 +05:30
										 |  |  |                     fileFormat=SupportedTypes.PARQUET.value, | 
					
						
							| 
									
										
										
										
											2023-05-19 18:54:28 +05:30
										 |  |  |                 ), | 
					
						
							|  |  |  |                 client=None, | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             assert resp == method_resp_file | 
					
						
							|  |  |  |             assert type(resp) == list | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @patch( | 
					
						
							|  |  |  |         "metadata.ingestion.source.database.database_service.DatabaseServiceSource.test_connection" | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-11-27 08:50:54 +01:00
										 |  |  |     def test_get_dataframes_fail(self, test_connection): | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |         with patch( | 
					
						
							| 
									
										
										
										
											2023-05-19 18:54:28 +05:30
										 |  |  |             "metadata.mixins.pandas.pandas_mixin.fetch_dataframe", | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |             return_value=None, | 
					
						
							|  |  |  |         ): | 
					
						
							|  |  |  |             with self.assertRaises(TypeError) as context: | 
					
						
							| 
									
										
										
										
											2024-06-07 04:36:17 +02:00
										 |  |  |                 config = OpenMetadataWorkflowConfig.model_validate(mock_datalake_config) | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |                 datalake_source = DatalakeSource.create( | 
					
						
							|  |  |  |                     mock_datalake_config["source"], | 
					
						
							|  |  |  |                     config.workflowConfig.openMetadataServerConfig, | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2024-11-27 08:50:54 +01:00
										 |  |  |                 PandasInterfaceMixin().get_dataframes( | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |                     service_connection_config=datalake_source.service_connection, | 
					
						
							|  |  |  |                     table=Table( | 
					
						
							| 
									
										
										
										
											2023-05-19 18:54:28 +05:30
										 |  |  |                         id="cec14ccf-123f-4271-8c90-0ae54cc4227e", | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |                         columns=[], | 
					
						
							| 
									
										
										
										
											2023-05-19 18:54:28 +05:30
										 |  |  |                         name="test", | 
					
						
							|  |  |  |                         databaseSchema=EntityReference( | 
					
						
							|  |  |  |                             name="Test", | 
					
						
							|  |  |  |                             id="cec14ccf-123f-4271-8c90-0ae54cc4227e", | 
					
						
							|  |  |  |                             type="databaseSchema", | 
					
						
							|  |  |  |                         ), | 
					
						
							| 
									
										
										
										
											2023-09-13 15:15:49 +05:30
										 |  |  |                         fileFormat=None, | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |                     ), | 
					
						
							| 
									
										
										
										
											2023-05-19 18:54:28 +05:30
										 |  |  |                     client=None, | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2023-05-19 18:54:28 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-11 20:58:31 +05:30
										 |  |  |             self.assertEqual(context.exception.args[0], "Couldn't fetch test") |