| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  Copyright 2025 Collate | 
					
						
							|  |  |  | #  Licensed under the Collate Community License, Version 1.0 (the "License"); | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | Protobuf parser tests | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-30 17:59:27 +05:30
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  | from unittest import TestCase | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-17 09:31:27 +05:30
										 |  |  | from metadata.generated.schema.entity.data.table import Column | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  | from metadata.parsers.protobuf_parser import ProtobufParser, ProtobufParserConfig | 
					
						
							| 
									
										
										
										
											2024-04-30 17:59:27 +05:30
										 |  |  | from metadata.utils.messaging_utils import merge_and_clean_protobuf_schema | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class ProtobufParserTests(TestCase): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Check methods from protobuf_parser.py | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     schema_name = "person_info" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     sample_protobuf_schema = """
 | 
					
						
							|  |  |  |     syntax = "proto3"; | 
					
						
							|  |  |  |     package persons; | 
					
						
							|  |  |  |     enum Gender { | 
					
						
							|  |  |  |         M = 0; // male  | 
					
						
							|  |  |  |         F = 1; // female | 
					
						
							|  |  |  |         O = 2; // other | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2024-01-17 09:31:27 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |     message Result { | 
					
						
							|  |  |  |         string url = 1; | 
					
						
							|  |  |  |         string title = 2; | 
					
						
							|  |  |  |         repeated string snippets = 3; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  |     message PersonInfo { | 
					
						
							|  |  |  |         int32 age = 1; // age in years | 
					
						
							|  |  |  |         Gender gender = 2;  | 
					
						
							| 
									
										
										
										
											2024-01-17 09:31:27 +05:30
										 |  |  |         Result gender_new = 3;  | 
					
						
							|  |  |  |         int32 height = 4; // height in cm | 
					
						
							|  |  |  |         fixed32 height_new = 5; // height in cm | 
					
						
							|  |  |  |         bool my_bool = 6; | 
					
						
							|  |  |  |         repeated string repeated_string = 7;    | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  |     } | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     protobuf_parser = ProtobufParser( | 
					
						
							|  |  |  |         config=ProtobufParserConfig( | 
					
						
							|  |  |  |             schema_name=schema_name, schema_text=sample_protobuf_schema | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     parsed_schema = protobuf_parser.parse_protobuf_schema() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_schema_name(self): | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         self.assertEqual(self.parsed_schema[0].name.root, "PersonInfo") | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-15 16:54:55 +05:30
										 |  |  |     def test_schema_type(self): | 
					
						
							| 
									
										
										
										
											2023-03-07 06:40:04 -08:00
										 |  |  |         self.assertEqual(self.parsed_schema[0].dataType.name, "RECORD") | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-15 16:54:55 +05:30
										 |  |  |     def test_field_names(self): | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         field_names = {str(field.name.root) for field in self.parsed_schema[0].children} | 
					
						
							| 
									
										
										
										
											2024-01-17 09:31:27 +05:30
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             field_names, | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "height", | 
					
						
							|  |  |  |                 "gender", | 
					
						
							|  |  |  |                 "age", | 
					
						
							|  |  |  |                 "gender_new", | 
					
						
							|  |  |  |                 "height_new", | 
					
						
							|  |  |  |                 "my_bool", | 
					
						
							|  |  |  |                 "repeated_string", | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_field_types(self): | 
					
						
							| 
									
										
										
										
											2023-03-07 06:40:04 -08:00
										 |  |  |         field_types = { | 
					
						
							|  |  |  |             str(field.dataType.name) for field in self.parsed_schema[0].children | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2024-01-17 09:31:27 +05:30
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             field_types, {"INT", "ENUM", "RECORD", "FIXED", "STRING", "BOOLEAN"} | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_column_types(self): | 
					
						
							|  |  |  |         parsed_schema = self.protobuf_parser.parse_protobuf_schema(cls=Column) | 
					
						
							|  |  |  |         field_types = {str(field.dataType.name) for field in parsed_schema[0].children} | 
					
						
							|  |  |  |         self.assertEqual(field_types, {"INT", "ENUM", "RECORD", "STRING", "BOOLEAN"}) | 
					
						
							| 
									
										
										
										
											2024-04-30 17:59:27 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_complex_protobuf_schema_files(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         We'll read the files under ./ingestion/tests/unit/resources/protobuf_parser and parse them | 
					
						
							|  |  |  |         This will be similar in way to how we get the data from kafka source | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-07-26 16:45:34 +02:00
										 |  |  |         resource_path = f"{os.path.dirname(__file__)}/resources/protobuf_parser/" | 
					
						
							| 
									
										
										
										
											2024-04-30 17:59:27 +05:30
										 |  |  |         schema_name = "employee" | 
					
						
							|  |  |  |         file_list = os.listdir(resource_path) | 
					
						
							|  |  |  |         schema_text = "" | 
					
						
							|  |  |  |         for file_name in file_list: | 
					
						
							|  |  |  |             file_path = os.path.join(resource_path, file_name) | 
					
						
							|  |  |  |             with open(file_path, "r") as file: | 
					
						
							|  |  |  |                 schema_text = schema_text + file.read() | 
					
						
							|  |  |  |         schema_text = merge_and_clean_protobuf_schema(schema_text) | 
					
						
							|  |  |  |         protobuf_parser = ProtobufParser( | 
					
						
							|  |  |  |             config=ProtobufParserConfig( | 
					
						
							|  |  |  |                 schema_name=schema_name, schema_text=schema_text | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         parsed_schema = protobuf_parser.parse_protobuf_schema() | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         self.assertEqual(parsed_schema[0].name.root, "Employee") | 
					
						
							| 
									
										
										
										
											2024-04-30 17:59:27 +05:30
										 |  |  |         self.assertEqual(len(parsed_schema[0].children), 4) | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         self.assertEqual(parsed_schema[0].children[3].name.root, "contact") | 
					
						
							|  |  |  |         self.assertEqual(parsed_schema[0].children[3].children[0].name.root, "email") | 
					
						
							|  |  |  |         self.assertEqual(parsed_schema[0].children[3].children[1].name.root, "phone") |