From 891707ecaa6e5cab7e22d02256d15bd5ecd81dab Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com> Date: Wed, 24 Aug 2022 10:40:19 +0200 Subject: [PATCH] bug: handle `Optional` params in schema validation (#2980) * not working draft * first draft * fix * revert json schema * better schema * improvements, support different python versions * little simplification * improvements and more tests * Revert "Merge branch 'handle_optional_params' into origin/main" This reverts commit 0114cba1f72c9bab23a3ce6a24cb4b346834cf34. * fix git mess * handle optional params; schema * test null values Co-authored-by: Sara Zan --- .../haystack-pipeline-master.schema.json | 1268 +++++++++++++---- haystack/nodes/_json_schema.py | 42 +- test/pipelines/test_pipeline_yaml.py | 35 + 3 files changed, 1097 insertions(+), 248 deletions(-) diff --git a/haystack/json-schemas/haystack-pipeline-master.schema.json b/haystack/json-schemas/haystack-pipeline-master.schema.json index 1c238a988..06a6ca15d 100644 --- a/haystack/json-schemas/haystack-pipeline-master.schema.json +++ b/haystack/json-schemas/haystack-pipeline-master.schema.json @@ -369,7 +369,14 @@ }, "index": { "title": "Index", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "duplicate_documents": { "title": "Duplicate Documents", @@ -378,7 +385,14 @@ }, "api_endpoint": { "title": "Api Endpoint", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "similarity": { "title": "Similarity", @@ -471,11 +485,25 @@ }, "api_key_id": { "title": "Api Key Id", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "api_key": { "title": "Api Key", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "aws4auth": { "title": "Aws4Auth" @@ -525,12 +553,26 @@ }, "custom_mapping": { "title": "Custom Mapping", - "type": "object" + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] }, "excluded_meta_data": { "title": "Excluded Meta Data", - "type": "array", - "items": {} + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "null" + } + ] }, "analyzer": { "title": "Analyzer", @@ -544,7 +586,14 @@ }, "ca_certs": { "title": "Ca Certs", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "verify_certs": { "title": "Verify Certs", @@ -603,8 +652,15 @@ }, "synonyms": { "title": "Synonyms", - "type": "array", - "items": {} + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "null" + } + ] }, "synonym_type": { "title": "Synonym Type", @@ -666,8 +722,15 @@ }, "faiss_index": { "title": "Faiss Index", - "type": "string", - "default": null + "default": null, + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "return_embedding": { "title": "Return Embedding", @@ -798,7 +861,14 @@ }, "index": { "title": "Index", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "prefixes": { "title": "Prefixes", @@ -847,7 +917,14 @@ "embedding_field": { "title": "Embedding Field", "default": "embedding", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "embedding_dim": { "title": "Embedding Dim", @@ -998,11 +1075,25 @@ }, "index_param": { "title": "Index Param", - "type": "object" + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] }, "search_param": { "title": "Search Param", - "type": "object" + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] }, "return_embedding": { "title": "Return Embedding", @@ -1021,8 +1112,15 @@ }, "custom_fields": { "title": "Custom Fields", - "type": "array", - "items": {} + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "null" + } + ] }, "progress_bar": { "title": "Progress Bar", @@ -1124,11 +1222,25 @@ }, "api_key_id": { "title": "Api Key Id", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "api_key": { "title": "Api Key", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "aws4auth": { "title": "Aws4Auth" @@ -1178,12 +1290,26 @@ }, "custom_mapping": { "title": "Custom Mapping", - "type": "object" + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] }, "excluded_meta_data": { "title": "Excluded Meta Data", - "type": "array", - "items": {} + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "null" + } + ] }, "analyzer": { "title": "Analyzer", @@ -1192,7 +1318,14 @@ }, "ca_certs": { "title": "Ca Certs", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "verify_certs": { "title": "Verify Certs", @@ -1251,8 +1384,15 @@ }, "synonyms": { "title": "Synonyms", - "type": "array", - "items": {} + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "null" + } + ] }, "synonym_type": { "title": "Synonym Type", @@ -1340,11 +1480,25 @@ }, "api_key_id": { "title": "Api Key Id", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "api_key": { "title": "Api Key", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "aws4auth": { "title": "Aws4Auth" @@ -1394,12 +1548,26 @@ }, "custom_mapping": { "title": "Custom Mapping", - "type": "object" + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] }, "excluded_meta_data": { "title": "Excluded Meta Data", - "type": "array", - "items": {} + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "null" + } + ] }, "analyzer": { "title": "Analyzer", @@ -1408,7 +1576,14 @@ }, "ca_certs": { "title": "Ca Certs", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "verify_certs": { "title": "Verify Certs", @@ -1467,8 +1642,15 @@ }, "synonyms": { "title": "Synonyms", - "type": "array", - "items": {} + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "null" + } + ] }, "synonym_type": { "title": "Synonym Type", @@ -1525,8 +1707,15 @@ }, "pinecone_index": { "title": "Pinecone Index", - "type": "string", - "default": null + "default": null, + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "embedding_dim": { "title": "Embedding Dim", @@ -1759,7 +1948,14 @@ }, "custom_schema": { "title": "Custom Schema", - "type": "object" + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] }, "return_embedding": { "title": "Return Embedding", @@ -1836,11 +2032,25 @@ }, "audio_params": { "title": "Audio Params", - "type": "object" + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] }, "transformers_params": { "title": "Transformers Params", - "type": "object" + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] }, "progress_bar": { "title": "Progress Bar", @@ -1891,10 +2101,17 @@ }, "valid_languages": { "title": "Valid Languages", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "save_json": { "title": "Save Json", @@ -1918,10 +2135,17 @@ }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "add_page_number": { "title": "Add Page Number", @@ -1977,7 +2201,14 @@ }, "custom_query": { "title": "Custom Query", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "scale_score": { "title": "Scale Score", @@ -2022,10 +2253,17 @@ }, "urls": { "title": "Urls", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "crawler_depth": { "title": "Crawler Depth", @@ -2034,8 +2272,15 @@ }, "filter_urls": { "title": "Filter Urls", - "type": "array", - "items": {} + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "null" + } + ] }, "overwrite_existing_files": { "title": "Overwrite Existing Files", @@ -2043,10 +2288,17 @@ }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "extract_hidden_text": { "title": "Extract Hidden Text", @@ -2054,19 +2306,40 @@ }, "loading_wait_time": { "title": "Loading Wait Time", - "type": "integer" + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] }, "crawler_naming_function": { "title": "Crawler Naming Function", - "type": "string", - "default": null + "default": null, + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "webdriver_options": { "title": "Webdriver Options", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] } }, "required": [ @@ -2132,7 +2405,14 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "max_seq_len_query": { "title": "Max Seq Len Query", @@ -2186,17 +2466,24 @@ }, "devices": { "title": "Devices", - "type": "array", - "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" + "anyOf": [ + { + "type": "array", + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "string" + } + ] } - ] - } + }, + { + "type": "null" + } + ] }, "use_auth_token": { "title": "Use Auth Token", @@ -2206,6 +2493,9 @@ }, { "type": "string" + }, + { + "type": "null" } ] }, @@ -2301,11 +2591,25 @@ }, "audio_params": { "title": "Audio Params", - "type": "object" + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] }, "transformers_params": { "title": "Transformers Params", - "type": "object" + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -2343,17 +2647,31 @@ }, "valid_languages": { "title": "Valid Languages", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "progress_bar": { "title": "Progress Bar", @@ -2405,7 +2723,14 @@ }, "custom_query": { "title": "Custom Query", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "required": [ @@ -2455,7 +2780,14 @@ }, "custom_query": { "title": "Custom Query", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "required": [ @@ -2499,7 +2831,14 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "use_gpu": { "title": "Use Gpu", @@ -2518,7 +2857,14 @@ }, "model_format": { "title": "Model Format", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "pooling_strategy": { "title": "Pooling Strategy", @@ -2542,17 +2888,24 @@ }, "devices": { "title": "Devices", - "type": "array", - "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" + "anyOf": [ + { + "type": "array", + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "string" + } + ] } - ] - } + }, + { + "type": "null" + } + ] }, "use_auth_token": { "title": "Use Auth Token", @@ -2562,6 +2915,9 @@ }, { "type": "string" + }, + { + "type": "null" } ] }, @@ -2758,7 +3114,14 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "context_window_size": { "title": "Context Window Size", @@ -2810,7 +3173,14 @@ }, "num_processes": { "title": "Num Processes", - "type": "integer" + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] }, "max_seq_len": { "title": "Max Seq Len", @@ -2839,14 +3209,28 @@ }, "confidence_threshold": { "title": "Confidence Threshold", - "type": "number" + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ] }, "proxies": { "title": "Proxies", - "type": "object", "additionalProperties": { "type": "string" - } + }, + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] }, "local_files_only": { "title": "Local Files Only", @@ -2864,6 +3248,9 @@ }, { "type": "string" + }, + { + "type": "null" } ] } @@ -2958,7 +3345,14 @@ }, "custom_query": { "title": "Custom Query", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "scale_score": { "title": "Scale Score", @@ -3007,17 +3401,31 @@ "default": [ "eng" ], - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -3055,14 +3463,28 @@ }, "weights": { "title": "Weights", - "type": "array", - "items": { - "type": "number" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "number" + } + }, + { + "type": "null" + } + ] }, "top_k_join": { "title": "Top K Join", - "type": "integer" + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] }, "sort_by_score": { "title": "Sort By Score", @@ -3105,14 +3527,28 @@ }, "weights": { "title": "Weights", - "type": "array", - "items": { - "type": "number" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "number" + } + }, + { + "type": "null" + } + ] }, "top_k_join": { "title": "Top K Join", - "type": "integer" + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] }, "sort_by_score": { "title": "Sort By Score", @@ -3155,17 +3591,31 @@ }, "valid_languages": { "title": "Valid Languages", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "progress_bar": { "title": "Progress Bar", @@ -3211,7 +3661,14 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "num_iterations": { "title": "Num Iterations", @@ -3260,17 +3717,24 @@ }, "devices": { "title": "Devices", - "type": "array", - "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" + "anyOf": [ + { + "type": "array", + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "string" + } + ] } - ] - } + }, + { + "type": "null" + } + ] }, "use_auth_token": { "title": "Use Auth Token", @@ -3280,6 +3744,9 @@ }, { "type": "string" + }, + { + "type": "null" } ] }, @@ -3365,17 +3832,38 @@ }, "examples_context": { "title": "Examples Context", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "examples": { "title": "Examples", - "type": "array", - "items": {} + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "null" + } + ] }, "stop_words": { "title": "Stop Words", - "type": "array", - "items": {} + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "null" + } + ] }, "progress_bar": { "title": "Progress Bar", @@ -3421,22 +3909,43 @@ }, "valid_languages": { "title": "Valid Languages", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "encoding": { "title": "Encoding", "default": "UTF-8", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -3477,17 +3986,31 @@ "default": [ "eng" ], - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -3568,17 +4091,31 @@ }, "valid_languages": { "title": "Valid Languages", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "add_page_number": { "title": "Add Page Number", @@ -3666,6 +4203,9 @@ { "type": "string", "format": "path" + }, + { + "type": "null" } ] }, @@ -3676,10 +4216,17 @@ }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "progress_bar": { "title": "Progress Bar", @@ -3897,12 +4444,26 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "retriever": { "title": "Retriever", - "type": "string", - "default": null + "default": null, + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "generator_type": { "title": "Generator Type", @@ -3936,7 +4497,14 @@ }, "prefix": { "title": "Prefix", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "use_gpu": { "title": "Use Gpu", @@ -3989,19 +4557,47 @@ }, "row_model_version": { "title": "Row Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "column_model_version": { "title": "Column Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "row_tokenizer": { "title": "Row Tokenizer", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "column_tokenizer": { "title": "Column Tokenizer", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "use_gpu": { "title": "Use Gpu", @@ -4054,10 +4650,17 @@ }, "metadata_values": { "title": "Metadata Values", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -4102,7 +4705,14 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "top_k": { "title": "Top K", @@ -4116,17 +4726,24 @@ }, "devices": { "title": "Devices", - "type": "array", - "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" + "anyOf": [ + { + "type": "array", + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "string" + } + ] } - ] - } + }, + { + "type": "null" + } + ] }, "batch_size": { "title": "Batch Size", @@ -4181,8 +4798,15 @@ }, "input_converter": { "title": "Input Converter", - "type": "string", - "default": null + "default": null, + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "top_k": { "title": "Top K", @@ -4268,7 +4892,14 @@ }, "batch_size": { "title": "Batch Size", - "type": "integer" + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] }, "progress_bar": { "title": "Progress Bar", @@ -4311,11 +4942,25 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "tokenizer": { "title": "Tokenizer", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "use_gpu": { "title": "Use Gpu", @@ -4416,7 +5061,14 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "max_seq_len_query": { "title": "Max Seq Len Query", @@ -4482,17 +5134,24 @@ }, "devices": { "title": "Devices", - "type": "array", - "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" + "anyOf": [ + { + "type": "array", + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "string" + } + ] } - ] - } + }, + { + "type": "null" + } + ] }, "use_auth_token": { "title": "Use Auth Token", @@ -4502,6 +5161,9 @@ }, { "type": "string" + }, + { + "type": "null" } ] }, @@ -4598,17 +5260,31 @@ }, "valid_languages": { "title": "Valid Languages", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "progress_bar": { "title": "Progress Bar", @@ -4701,17 +5377,31 @@ }, "valid_languages": { "title": "Valid Languages", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "id_hash_keys": { "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -4749,11 +5439,25 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "tokenizer": { "title": "Tokenizer", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "use_gpu": { "title": "Use Gpu", @@ -4772,10 +5476,17 @@ }, "labels": { "title": "Labels", - "type": "array", - "items": { - "type": "string" - } + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ] }, "batch_size": { "title": "Batch Size", @@ -4835,11 +5546,25 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "tokenizer": { "title": "Tokenizer", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "use_gpu": { "title": "Use Gpu", @@ -4908,11 +5633,25 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "tokenizer": { "title": "Tokenizer", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "context_window_size": { "title": "Context Window Size", @@ -4990,11 +5729,25 @@ }, "model_version": { "title": "Model Version", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "tokenizer": { "title": "Tokenizer", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "max_length": { "title": "Max Length", @@ -5071,16 +5824,37 @@ }, "tokenizer_name": { "title": "Tokenizer Name", - "type": "string" + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] }, "max_seq_len": { "title": "Max Seq Len", - "type": "integer" + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] }, "clean_up_tokenization_spaces": { "title": "Clean Up Tokenization Spaces", "default": true, - "type": "boolean" + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ] }, "use_gpu": { "title": "Use Gpu", diff --git a/haystack/nodes/_json_schema.py b/haystack/nodes/_json_schema.py index 87c798b8f..162482823 100644 --- a/haystack/nodes/_json_schema.py +++ b/haystack/nodes/_json_schema.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union import sys import json @@ -126,6 +126,41 @@ def find_subclasses_in_modules(importable_modules: List[str]): ] +def handle_optional_params(param_fields: List[inspect.Parameter], params_schema: Dict[str, Any]) -> Dict[str, Any]: + """ + Pydantic v1 cannot generate correct JSON schemas including Optional fields. + (https://github.com/samuelcolvin/pydantic/issues/1270) + This function detects optional parameters and updates the schema, + to allow null values for these parameters. + To be removed when Pydantic v2 is released and adopted + """ + optional_params = [] + for param in param_fields: + is_param_optional = ( + hasattr(param.annotation, "__origin__") + and param.annotation.__origin__ == Union + and type(None) in param.annotation.__args__ + ) + if is_param_optional: + optional_params.append(param) + + for param in optional_params: + param_dict = params_schema["properties"][param.name] + type_ = param_dict.pop("type", None) + if type_: + if "items" in param_dict: + items = param_dict.pop("items") + param_dict["anyOf"] = [{"type": type_, "items": items}, {"type": "null"}] + else: + param_dict["anyOf"] = [{"type": type_}, {"type": "null"}] + else: + anyof_list = param_dict.pop("anyOf", None) + if anyof_list: + anyof_list.append({"type": "null"}) + param_dict["anyOf"] = anyof_list + return params_schema + + def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[str, Any], Dict[str, Any]]: """ Create the JSON schema for a single BaseComponent subclass, @@ -177,6 +212,11 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[ model = create_model(f"{node_name}ComponentParams", __config__=Config, **param_fields_kwargs) model.update_forward_refs(**model.__dict__) params_schema = model.schema() + + # Pydantic v1 patch to generate JSON schemas including Optional fields + # to be removed when Pydantic v2 is released and adopted + params_schema = handle_optional_params(param_fields, params_schema) + params_schema["title"] = "Parameters" desc = "Each parameter can reference other components defined in the same YAML file." params_schema["description"] = desc diff --git a/test/pipelines/test_pipeline_yaml.py b/test/pipelines/test_pipeline_yaml.py index aa7565137..552bf5010 100644 --- a/test/pipelines/test_pipeline_yaml.py +++ b/test/pipelines/test_pipeline_yaml.py @@ -8,6 +8,7 @@ import inspect import networkx as nx from enum import Enum from pydantic.dataclasses import dataclass +from typing import Any, Dict, List, Optional import haystack from haystack import Pipeline @@ -320,6 +321,40 @@ def test_load_yaml_custom_component(tmp_path): assert pipeline.get_node("custom_node").param == 1 +def test_load_yaml_custom_component_with_null_values(tmp_path): + class CustomNode(MockNode): + def __init__(self, param: Optional[str], lst_param: Optional[List[Any]], dict_param: Optional[Dict[str, Any]]): + super().__init__() + self.param = param + self.lst_param = lst_param + self.dict_param = dict_param + + with open(tmp_path / "tmp_config.yml", "w") as tmp_file: + tmp_file.write( + f""" + version: ignore + components: + - name: custom_node + type: CustomNode + params: + param: null + lst_param: null + dict_param: null + pipelines: + - name: my_pipeline + nodes: + - name: custom_node + inputs: + - Query + """ + ) + pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml") + assert len(pipeline.graph.nodes) == 2 + assert pipeline.get_node("custom_node").param is None + assert pipeline.get_node("custom_node").lst_param is None + assert pipeline.get_node("custom_node").dict_param is None + + def test_load_yaml_custom_component_with_no_init(tmp_path): class CustomNode(MockNode): pass