| 
									
										
										
										
											2023-02-02 17:10:35 +01:00
										 |  |  | #  Copyright 2021 Collate | 
					
						
							|  |  |  | #  Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							|  |  |  | #  http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | This script generates the Python models from the JSON Schemas definition. Additionally, it replaces the `SecretStr` | 
					
						
							|  |  |  | pydantic class used for the password fields with the `CustomSecretStr` pydantic class which retrieves the secrets | 
					
						
							|  |  |  | from a configured secrets' manager. | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2022-11-11 09:59:15 +01:00
										 |  |  | import datamodel_code_generator.model.pydantic | 
					
						
							|  |  |  | from datamodel_code_generator.imports import Import | 
					
						
							| 
									
										
										
										
											2023-10-30 18:23:47 +11:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2022-11-11 09:59:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-02-02 12:49:45 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-11 09:59:15 +01:00
										 |  |  | datamodel_code_generator.model.pydantic.types.IMPORT_SECRET_STR = Import.from_full_path( | 
					
						
							|  |  |  |     "metadata.ingestion.models.custom_pydantic.CustomSecretStr" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from datamodel_code_generator.__main__ import main | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-30 18:23:47 +11:00
										 |  |  | current_directory = os.getcwd() | 
					
						
							|  |  |  | ingestion_path = "./" if current_directory.endswith("/ingestion") else "ingestion/" | 
					
						
							|  |  |  | directory_root = "../" if current_directory.endswith("/ingestion") else "./" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-27 19:12:24 +01:00
										 |  |  | UTF_8 = "UTF-8" | 
					
						
							| 
									
										
										
										
											2023-10-30 18:23:47 +11:00
										 |  |  | UNICODE_REGEX_REPLACEMENT_FILE_PATHS = [ | 
					
						
							|  |  |  |     f"{ingestion_path}src/metadata/generated/schema/entity/classification/tag.py", | 
					
						
							|  |  |  |     f"{ingestion_path}src/metadata/generated/schema/entity/events/webhook.py", | 
					
						
							|  |  |  |     f"{ingestion_path}src/metadata/generated/schema/entity/teams/user.py", | 
					
						
							|  |  |  |     f"{ingestion_path}src/metadata/generated/schema/entity/type.py", | 
					
						
							|  |  |  |     f"{ingestion_path}src/metadata/generated/schema/type/basic.py", | 
					
						
							|  |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-07 22:52:16 +02:00
										 |  |  | args = f"--input {directory_root}openmetadata-spec/src/main/resources/json/schema --output-model-type pydantic_v2.BaseModel --use-annotated --base-class metadata.ingestion.models.custom_pydantic.BaseModel --input-file-type jsonschema --output {ingestion_path}src/metadata/generated/schema --set-default-enum-member".split(" ") | 
					
						
							| 
									
										
										
										
											2023-10-30 18:23:47 +11:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-11 09:59:15 +01:00
										 |  |  | main(args) | 
					
						
							| 
									
										
										
										
											2023-02-02 12:49:45 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | for file_path in UNICODE_REGEX_REPLACEMENT_FILE_PATHS: | 
					
						
							| 
									
										
										
										
											2024-03-27 19:12:24 +01:00
										 |  |  |     with open(file_path, "r", encoding=UTF_8) as file_: | 
					
						
							| 
									
										
										
										
											2023-02-02 12:49:45 -08:00
										 |  |  |         content = file_.read() | 
					
						
							| 
									
										
										
										
											2023-12-14 15:46:58 +01:00
										 |  |  |         # Python now requires to move the global flags at the very start of the expression | 
					
						
							| 
									
										
										
										
											2023-12-18 17:01:57 +01:00
										 |  |  |         content = content.replace("(?U)", "(?u)") | 
					
						
							| 
									
										
										
										
											2024-03-27 19:12:24 +01:00
										 |  |  |     with open(file_path, "w", encoding=UTF_8) as file_: | 
					
						
							| 
									
										
										
										
											2023-02-02 12:49:45 -08:00
										 |  |  |         file_.write(content) | 
					
						
							| 
									
										
										
										
											2024-03-27 19:12:24 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Until https://github.com/koxudaxi/datamodel-code-generator/issues/1895 | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  | # TODO: This has been merged but `Union` is still not there. We'll need to validate | 
					
						
							| 
									
										
										
										
											2024-03-27 19:12:24 +01:00
										 |  |  | MISSING_IMPORTS = [f"{ingestion_path}src/metadata/generated/schema/entity/applications/app.py",] | 
					
						
							|  |  |  | WRITE_AFTER = "from __future__ import annotations" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | for file_path in MISSING_IMPORTS: | 
					
						
							|  |  |  |     with open(file_path, "r", encoding=UTF_8) as file_: | 
					
						
							|  |  |  |         lines = file_.readlines() | 
					
						
							|  |  |  |     with open(file_path, "w", encoding=UTF_8) as file_: | 
					
						
							|  |  |  |         for line in lines: | 
					
						
							|  |  |  |             file_.write(line) | 
					
						
							|  |  |  |             if line.strip() == WRITE_AFTER: | 
					
						
							|  |  |  |                 file_.write("from typing import Union  # custom generate import\n\n") | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # unsupported rust regex pattern for pydantic v2 | 
					
						
							|  |  |  | # https://docs.pydantic.dev/2.7/api/config/#pydantic.config.ConfigDict.regex_engine | 
					
						
							|  |  |  | # We'll remove validation from the client and let it fail on the server, rather than on the model generation | 
					
						
							|  |  |  | UNSUPPORTED_REGEX_PATTERN_FILE_PATHS = [ | 
					
						
							|  |  |  |     f"{ingestion_path}src/metadata/generated/schema/type/basic.py", | 
					
						
							|  |  |  |     f"{ingestion_path}src/metadata/generated/schema/entity/data/searchIndex.py", | 
					
						
							|  |  |  |     f"{ingestion_path}src/metadata/generated/schema/entity/data/table.py", | 
					
						
							|  |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | for file_path in UNSUPPORTED_REGEX_PATTERN_FILE_PATHS: | 
					
						
							|  |  |  |     with open(file_path, "r", encoding=UTF_8) as file_: | 
					
						
							|  |  |  |         content = file_.read() | 
					
						
							|  |  |  |         content = content.replace("pattern='^((?!::).)*$',", "") | 
					
						
							|  |  |  |     with open(file_path, "w", encoding=UTF_8) as file_: | 
					
						
							|  |  |  |         file_.write(content) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-07 04:36:17 +02:00
										 |  |  | # Until https://github.com/koxudaxi/datamodel-code-generator/issues/1996 | 
					
						
							|  |  |  | # Supporting timezone aware datetime is too complex for the profiler | 
					
						
							|  |  |  | DATETIME_AWARE_FILE_PATHS = [ | 
					
						
							|  |  |  |     f"{ingestion_path}src/metadata/generated/schema/type/basic.py", | 
					
						
							|  |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | for file_path in DATETIME_AWARE_FILE_PATHS: | 
					
						
							|  |  |  |     with open(file_path, "r", encoding=UTF_8) as file_: | 
					
						
							|  |  |  |         content = file_.read() | 
					
						
							|  |  |  |         content = content.replace( | 
					
						
							|  |  |  |             "from pydantic import AnyUrl, AwareDatetime, ConfigDict, EmailStr, Field, RootModel", | 
					
						
							|  |  |  |             "from pydantic import AnyUrl, ConfigDict, EmailStr, Field, RootModel" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         content = content.replace("from datetime import date, time", "from datetime import date, time, datetime") | 
					
						
							|  |  |  |         content = content.replace("AwareDatetime", "datetime") | 
					
						
							|  |  |  |     with open(file_path, "w", encoding=UTF_8) as file_: | 
					
						
							|  |  |  |         file_.write(content) |