mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-10-31 02:29:03 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			101 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			101 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #  Copyright 2021 Collate
 | |
| #  Licensed under the Apache License, Version 2.0 (the "License");
 | |
| #  you may not use this file except in compliance with the License.
 | |
| #  You may obtain a copy of the License at
 | |
| #  http://www.apache.org/licenses/LICENSE-2.0
 | |
| #  Unless required by applicable law or agreed to in writing, software
 | |
| #  distributed under the License is distributed on an "AS IS" BASIS,
 | |
| #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| #  See the License for the specific language governing permissions and
 | |
| #  limitations under the License.
 | |
| """
 | |
| This script generates the Python models from the JSON Schemas definition. Additionally, it replaces the `SecretStr`
 | |
| pydantic class used for the password fields with the `CustomSecretStr` pydantic class which retrieves the secrets
 | |
| from a configured secrets' manager.
 | |
| """
 | |
| import datamodel_code_generator.model.pydantic
 | |
| from datamodel_code_generator.imports import Import
 | |
| import os
 | |
| import re
 | |
| 
 | |
| 
 | |
| datamodel_code_generator.model.pydantic.types.IMPORT_SECRET_STR = Import.from_full_path(
 | |
|     "metadata.ingestion.models.custom_pydantic.CustomSecretStr"
 | |
| )
 | |
| 
 | |
| from datamodel_code_generator.__main__ import main
 | |
| 
 | |
| current_directory = os.getcwd()
 | |
| ingestion_path = "./" if current_directory.endswith("/ingestion") else "ingestion/"
 | |
| directory_root = "../" if current_directory.endswith("/ingestion") else "./"
 | |
| 
 | |
| UTF_8 = "UTF-8"
 | |
| UNICODE_REGEX_REPLACEMENT_FILE_PATHS = [
 | |
|     f"{ingestion_path}src/metadata/generated/schema/entity/classification/tag.py",
 | |
|     f"{ingestion_path}src/metadata/generated/schema/entity/events/webhook.py",
 | |
|     f"{ingestion_path}src/metadata/generated/schema/entity/teams/user.py",
 | |
|     f"{ingestion_path}src/metadata/generated/schema/entity/type.py",
 | |
|     f"{ingestion_path}src/metadata/generated/schema/type/basic.py",
 | |
| ]
 | |
| 
 | |
| args = f"--input {directory_root}openmetadata-spec/src/main/resources/json/schema --output-model-type pydantic_v2.BaseModel --use-annotated --base-class metadata.ingestion.models.custom_pydantic.BaseModel --input-file-type jsonschema --output {ingestion_path}src/metadata/generated/schema --set-default-enum-member".split(" ")
 | |
| 
 | |
| main(args)
 | |
| 
 | |
| for file_path in UNICODE_REGEX_REPLACEMENT_FILE_PATHS:
 | |
|     with open(file_path, "r", encoding=UTF_8) as file_:
 | |
|         content = file_.read()
 | |
|         # Python now requires to move the global flags at the very start of the expression
 | |
|         content = content.replace("(?U)", "(?u)")
 | |
|     with open(file_path, "w", encoding=UTF_8) as file_:
 | |
|         file_.write(content)
 | |
| 
 | |
| # Until https://github.com/koxudaxi/datamodel-code-generator/issues/1895
 | |
| # TODO: This has been merged but `Union` is still not there. We'll need to validate
 | |
| MISSING_IMPORTS = [f"{ingestion_path}src/metadata/generated/schema/entity/applications/app.py",]
 | |
| WRITE_AFTER = "from __future__ import annotations"
 | |
| 
 | |
| for file_path in MISSING_IMPORTS:
 | |
|     with open(file_path, "r", encoding=UTF_8) as file_:
 | |
|         lines = file_.readlines()
 | |
|     with open(file_path, "w", encoding=UTF_8) as file_:
 | |
|         for line in lines:
 | |
|             file_.write(line)
 | |
|             if line.strip() == WRITE_AFTER:
 | |
|                 file_.write("from typing import Union  # custom generate import\n\n")
 | |
| 
 | |
| 
 | |
| # unsupported rust regex pattern for pydantic v2
 | |
| # https://docs.pydantic.dev/2.7/api/config/#pydantic.config.ConfigDict.regex_engine
 | |
| # We'll remove validation from the client and let it fail on the server, rather than on the model generation
 | |
| UNSUPPORTED_REGEX_PATTERN_FILE_PATHS = [
 | |
|     f"{ingestion_path}src/metadata/generated/schema/type/basic.py",
 | |
|     f"{ingestion_path}src/metadata/generated/schema/entity/data/searchIndex.py",
 | |
|     f"{ingestion_path}src/metadata/generated/schema/entity/data/table.py",
 | |
| ]
 | |
| 
 | |
| for file_path in UNSUPPORTED_REGEX_PATTERN_FILE_PATHS:
 | |
|     with open(file_path, "r", encoding=UTF_8) as file_:
 | |
|         content = file_.read()
 | |
|         content = content.replace("pattern='^((?!::).)*$',", "")
 | |
|     with open(file_path, "w", encoding=UTF_8) as file_:
 | |
|         file_.write(content)
 | |
| 
 | |
| # Until https://github.com/koxudaxi/datamodel-code-generator/issues/1996
 | |
| # Supporting timezone aware datetime is too complex for the profiler
 | |
| DATETIME_AWARE_FILE_PATHS = [
 | |
|     f"{ingestion_path}src/metadata/generated/schema/type/basic.py",
 | |
| ]
 | |
| 
 | |
| for file_path in DATETIME_AWARE_FILE_PATHS:
 | |
|     with open(file_path, "r", encoding=UTF_8) as file_:
 | |
|         content = file_.read()
 | |
|         content = content.replace(
 | |
|             "from pydantic import AnyUrl, AwareDatetime, ConfigDict, EmailStr, Field, RootModel",
 | |
|             "from pydantic import AnyUrl, ConfigDict, EmailStr, Field, RootModel"
 | |
|         )
 | |
|         content = content.replace("from datetime import date, time", "from datetime import date, time, datetime")
 | |
|         content = content.replace("AwareDatetime", "datetime")
 | |
|     with open(file_path, "w", encoding=UTF_8) as file_:
 | |
|         file_.write(content)
 | 
