| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  Copyright 2025 Collate | 
					
						
							|  |  |  | #  Licensed under the Collate Community License, Version 1.0 (the "License"); | 
					
						
							| 
									
										
										
										
											2022-01-10 09:36:08 +01:00
										 |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE | 
					
						
							| 
									
										
										
										
											2022-01-10 09:36:08 +01:00
										 |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | OpenMetadata MlModel mixin test | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | from unittest import TestCase | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import pandas as pd | 
					
						
							|  |  |  | import sklearn.datasets as datasets | 
					
						
							|  |  |  | from sklearn.model_selection import train_test_split | 
					
						
							|  |  |  | from sklearn.tree import DecisionTreeClassifier | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-01 01:29:56 +01:00
										 |  |  | from metadata.generated.schema.api.data.createMlModel import CreateMlModelRequest | 
					
						
							| 
									
										
										
										
											2022-03-14 06:59:15 +01:00
										 |  |  | from metadata.generated.schema.entity.data.mlmodel import MlModel | 
					
						
							| 
									
										
										
										
											2022-04-12 23:40:21 -07:00
										 |  |  | from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( | 
					
						
							|  |  |  |     OpenMetadataConnection, | 
					
						
							| 
									
										
										
										
											2022-04-07 14:52:50 +02:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2022-09-26 16:19:47 +05:30
										 |  |  | from metadata.generated.schema.security.client.openMetadataJWTClientConfig import ( | 
					
						
							|  |  |  |     OpenMetadataJWTClientConfig, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2022-01-10 09:36:08 +01:00
										 |  |  | from metadata.ingestion.ometa.ometa_api import OpenMetadata | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class OMetaModelMixinTest(TestCase): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Test the MlModel integrations from MlModel Mixin | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-26 16:19:47 +05:30
										 |  |  |     server_config = OpenMetadataConnection( | 
					
						
							|  |  |  |         hostPort="http://localhost:8585/api", | 
					
						
							|  |  |  |         authProvider="openmetadata", | 
					
						
							|  |  |  |         securityConfig=OpenMetadataJWTClientConfig( | 
					
						
							|  |  |  |             jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2022-01-10 09:36:08 +01:00
										 |  |  |     metadata = OpenMetadata(server_config) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     iris = datasets.load_iris() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_get_sklearn(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Check that we can ingest an SKlearn model | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         df = pd.DataFrame(self.iris.data, columns=self.iris.feature_names) | 
					
						
							|  |  |  |         y = self.iris.target | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         x_train, x_test, y_train, y_test = train_test_split( | 
					
						
							|  |  |  |             df, y, test_size=0.25, random_state=70 | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dtree = DecisionTreeClassifier() | 
					
						
							|  |  |  |         dtree.fit(x_train, y_train) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-01 01:29:56 +01:00
										 |  |  |         entity_create: CreateMlModelRequest = self.metadata.get_mlmodel_sklearn( | 
					
						
							| 
									
										
										
										
											2022-01-10 09:36:08 +01:00
										 |  |  |             name="test-sklearn", | 
					
						
							|  |  |  |             model=dtree, | 
					
						
							|  |  |  |             description="Creating a test sklearn model", | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         entity: MlModel = self.metadata.create_or_update(data=entity_create) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual(entity.name, entity_create.name) | 
					
						
							|  |  |  |         self.assertEqual(entity.algorithm, "DecisionTreeClassifier") | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |             {feature.name.root for feature in entity.mlFeatures}, | 
					
						
							| 
									
										
										
										
											2022-01-10 09:36:08 +01:00
										 |  |  |             { | 
					
						
							|  |  |  |                 "sepal_length__cm_", | 
					
						
							|  |  |  |                 "sepal_width__cm_", | 
					
						
							|  |  |  |                 "petal_length__cm_", | 
					
						
							|  |  |  |                 "petal_width__cm_", | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         hyper_param = next( | 
					
						
							|  |  |  |             iter( | 
					
						
							|  |  |  |                 param for param in entity.mlHyperParameters if param.name == "criterion" | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             None, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertIsNotNone(hyper_param) |