diff --git a/.github/utils/generate_json_schema.py b/.github/utils/generate_json_schema.py new file mode 100644 index 000000000..eb8653a15 --- /dev/null +++ b/.github/utils/generate_json_schema.py @@ -0,0 +1,256 @@ +import json +import logging +import subprocess +from pathlib import Path +from typing import Any, Dict, Optional, Set, Tuple + +import haystack.document_stores +import haystack.nodes +import pydantic.schema +from fastapi.dependencies.utils import get_typed_signature +from github import Github +from pydantic import BaseConfig, BaseSettings, Required, SecretStr, create_model +from pydantic.fields import ModelField +from pydantic.schema import SkipField, TypeModelOrEnum, TypeModelSet, encode_default +from pydantic.schema import field_singleton_schema as _field_singleton_schema +from pydantic.typing import is_callable_type +from pydantic.utils import lenient_issubclass + +schema_version = "0.7" +filename = f"haystack-pipeline.{schema_version}.schema.json" +destination_path = Path(__file__).parent.parent.parent / "json-schemas" / filename + + +class Settings(BaseSettings): + input_token: SecretStr + github_repository: str + + +# Monkey patch Pydantic's field_singleton_schema to convert classes and functions to +# strings in JSON Schema +def field_singleton_schema( + field: ModelField, + *, + by_alias: bool, + model_name_map: Dict[TypeModelOrEnum, str], + ref_template: str, + schema_overrides: bool = False, + ref_prefix: Optional[str] = None, + known_models: TypeModelSet, +) -> Tuple[Dict[str, Any], Dict[str, Any], Set[str]]: + try: + return _field_singleton_schema( + field, + by_alias=by_alias, + model_name_map=model_name_map, + ref_template=ref_template, + schema_overrides=schema_overrides, + ref_prefix=ref_prefix, + known_models=known_models, + ) + except (ValueError, SkipField): + schema: Dict[str, Any] = {"type": "string"} + + if isinstance(field.default, type) or is_callable_type(field.default): + default = field.default.__name__ + else: + default = field.default + if not field.required: + schema["default"] = encode_default(default) + return schema, {}, set() + + +# Monkeypatch Pydantic's field_singleton_schema +pydantic.schema.field_singleton_schema = field_singleton_schema + + +class Config(BaseConfig): + extra = "forbid" + + +def get_json_schema(): + schema_definitions = {} + additional_definitions = {} + + modules_with_nodes = [haystack.nodes, haystack.document_stores] + possible_nodes = [] + for module in modules_with_nodes: + for importable_name in dir(module): + imported = getattr(module, importable_name) + possible_nodes.append((module, imported)) + # TODO: decide if there's a better way to not include Base classes other than by + # the prefix "Base" in the name. Maybe it could make sense to have a list of + # all the valid nodes to include in the main source code and then using that here. + for module, node in possible_nodes: + if lenient_issubclass( + node, haystack.nodes.BaseComponent + ) and not node.__name__.startswith("Base"): + logging.info(f"Processing node: {node.__name__}") + init_method = getattr(node, "__init__", None) + if init_method: + signature = get_typed_signature(init_method) + param_fields = [ + param + for param in signature.parameters.values() + if param.kind not in {param.VAR_POSITIONAL, param.VAR_KEYWORD} + ] + # Remove self parameter + param_fields.pop(0) + param_fields_kwargs: Dict[str, Any] = {} + for param in param_fields: + logging.info(f"--- processing param: {param.name}") + annotation = Any + if param.annotation != param.empty: + annotation = param.annotation + default = Required + if param.default != param.empty: + default = param.default + param_fields_kwargs[param.name] = (annotation, default) + model = create_model( + f"{node.__name__}ComponentParams", + __config__=Config, + **param_fields_kwargs, + ) + model.update_forward_refs(**model.__dict__) + params_schema = model.schema() + params_schema["title"] = "Parameters" + params_schema[ + "description" + ] = "Each parameter can reference other components defined in the same YAML file." + if "definitions" in params_schema: + params_definitions = params_schema.pop("definitions") + additional_definitions.update(params_definitions) + component_schema = { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string", + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": f"{node.__name__}", + }, + "params": params_schema, + }, + "required": ["type", "name"], + "additionalProperties": False, + } + schema_definitions[f"{node.__name__}Component"] = component_schema + + all_definitions = {**schema_definitions, **additional_definitions} + component_refs = [{"$ref": f"#/definitions/{name}"} for name in schema_definitions] + pipeline_schema = { + "$schema": "http://json-schema.org/draft-07/schema", + "$id": f"https://haystack.deepset.ai/json-schemas/{filename}", + "title": "Haystack Pipeline", + "description": "Haystack Pipeline YAML file describing the nodes of the pipelines. For more info read the docs at: https://haystack.deepset.ai/components/pipelines#yaml-file-definitions", + "type": "object", + "properties": { + "version": { + "title": "Version", + "description": "Version of the Haystack Pipeline file.", + "type": "string", + "const": schema_version, + }, + "components": { + "title": "Components", + "description": "Component nodes and their configurations, to later be used in the pipelines section. Define here all the building blocks for the pipelines.", + "type": "array", + "items": {"anyOf": component_refs}, + "required": ["type", "name"], + "additionalProperties": False, + }, + "pipelines": { + "title": "Pipelines", + "description": "Multiple pipelines can be defined using the components from the same YAML file.", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Name of the pipeline.", + "type": "string", + }, + "nodes": { + "title": "Nodes", + "description": "Nodes to be used by this particular pipeline", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The name of this particular node in the pipeline. This should be one of the names from the components defined in the same file.", + "type": "string", + }, + "inputs": { + "title": "Inputs", + "description": "Input parameters for this node.", + "type": "array", + "items": {"type": "string"}, + }, + }, + "additionalProperties": False, + }, + "required": ["name", "nodes"], + "additionalProperties": False, + }, + }, + "additionalProperties": False, + }, + }, + }, + "required": ["version", "components", "pipelines"], + "additionalProperties": False, + "definitions": all_definitions, + } + return pipeline_schema + + +def generate_json_schema(): + pipeline_schema = get_json_schema() + destination_path.parent.mkdir(parents=True, exist_ok=True) + destination_path.write_text(json.dumps(pipeline_schema, indent=2)) + + +def main(): + generate_json_schema() + logging.basicConfig(level=logging.INFO) + settings = Settings() + logging.info(f"Using config: {settings.json()}") + g = Github(settings.input_token.get_secret_value()) + repo = g.get_repo(settings.github_repository) + + logging.info("Setting up GitHub Actions git user") + subprocess.run(["git", "config", "user.name", "github-actions"], check=True) + subprocess.run( + ["git", "config", "user.email", "github-actions@github.com"], check=True + ) + branch_name = "generate-json-schema" + logging.info(f"Creating a new branch {branch_name}") + subprocess.run(["git", "checkout", "-b", branch_name], check=True) + logging.info("Adding updated file") + subprocess.run(["git", "add", str(destination_path)], check=True) + logging.info("Committing updated file") + message = "⬆ Upgrade JSON Schema file" + subprocess.run(["git", "commit", "-m", message], check=True) + logging.info("Pushing branch") + subprocess.run(["git", "push", "origin", branch_name], check=True) + logging.info("Creating PR") + pr = repo.create_pull(title=message, body=message, base="master", head=branch_name) + logging.info(f"Created PR: {pr.number}") + logging.info("Finished") + + +if __name__ == "__main__": + # If you only want to generate the JSON Schema file without submitting a PR + # uncomment this line: + generate_json_schema() + + # and comment this line: + # main() diff --git a/.github/workflows/generate_json_schema.yml b/.github/workflows/generate_json_schema.yml new file mode 100644 index 000000000..92a5813a1 --- /dev/null +++ b/.github/workflows/generate_json_schema.yml @@ -0,0 +1,30 @@ +name: Generate JSON Schema + +on: + workflow_dispatch: + +jobs: + generate-json-schema: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.7" + - uses: actions/cache@v2 + id: cache + with: + path: ${{ env.pythonLocation }} + key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('requirements-json-schema.txt') }}-generate-json-schema + - name: Install + if: steps.cache.outputs.cache-hit != 'true' + run: | + pip install . + pip install -r requirements-json-schema.txt + - name: Generate JSON Schema + run: python ./.github/utils/generate_json_schema.py + env: + INPUT_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md index 93dc818e1..ad97e1609 100644 --- a/docs/_src/api/api/document_store.md +++ b/docs/_src/api/api/document_store.md @@ -1325,7 +1325,7 @@ the vector embeddings are indexed in a FAISS Index. #### \_\_init\_\_ ```python - | __init__(sql_url: str = "sqlite:///faiss_document_store.db", vector_dim: int = None, embedding_dim: int = 768, faiss_index_factory_str: str = "Flat", faiss_index: Optional["faiss.swigfaiss.Index"] = None, return_embedding: bool = False, index: str = "document", similarity: str = "dot_product", embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = 'overwrite', faiss_index_path: Union[str, Path] = None, faiss_config_path: Union[str, Path] = None, isolation_level: str = None, **kwargs, ,) + | __init__(sql_url: str = "sqlite:///faiss_document_store.db", vector_dim: int = None, embedding_dim: int = 768, faiss_index_factory_str: str = "Flat", faiss_index: "Optional[faiss.swigfaiss.Index]" = None, return_embedding: bool = False, index: str = "document", similarity: str = "dot_product", embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = 'overwrite', faiss_index_path: Union[str, Path] = None, faiss_config_path: Union[str, Path] = None, isolation_level: str = None, **kwargs, ,) ``` **Arguments**: diff --git a/docs/_src/api/api/reader.md b/docs/_src/api/api/reader.md index 806bfff0a..e98a488d5 100644 --- a/docs/_src/api/api/reader.md +++ b/docs/_src/api/api/reader.md @@ -48,7 +48,7 @@ While the underlying model can vary (BERT, Roberta, DistilBERT, ...), the interf #### \_\_init\_\_ ```python - | __init__(model_name_or_path: str, model_version: Optional[str] = None, context_window_size: int = 150, batch_size: int = 50, use_gpu: bool = True, no_ans_boost: float = 0.0, return_no_answer: bool = False, top_k: int = 10, top_k_per_candidate: int = 3, top_k_per_sample: int = 1, num_processes: Optional[int] = None, max_seq_len: int = 256, doc_stride: int = 128, progress_bar: bool = True, duplicate_filtering: int = 0, use_confidence_scores: bool = True, proxies=None, local_files_only=False, force_download=False, use_auth_token: Optional[Union[str,bool]] = None, **kwargs) + | __init__(model_name_or_path: str, model_version: Optional[str] = None, context_window_size: int = 150, batch_size: int = 50, use_gpu: bool = True, no_ans_boost: float = 0.0, return_no_answer: bool = False, top_k: int = 10, top_k_per_candidate: int = 3, top_k_per_sample: int = 1, num_processes: Optional[int] = None, max_seq_len: int = 256, doc_stride: int = 128, progress_bar: bool = True, duplicate_filtering: int = 0, use_confidence_scores: bool = True, proxies: Optional[Dict[str, str]] = None, local_files_only=False, force_download=False, use_auth_token: Optional[Union[str,bool]] = None, **kwargs) ``` **Arguments**: diff --git a/haystack/document_stores/faiss.py b/haystack/document_stores/faiss.py index 8337ae312..bb37296c0 100644 --- a/haystack/document_stores/faiss.py +++ b/haystack/document_stores/faiss.py @@ -43,7 +43,7 @@ class FAISSDocumentStore(SQLDocumentStore): vector_dim: int = None, embedding_dim: int = 768, faiss_index_factory_str: str = "Flat", - faiss_index: Optional["faiss.swigfaiss.Index"] = None, + faiss_index: "Optional[faiss.swigfaiss.Index]" = None, return_embedding: bool = False, index: str = "document", similarity: str = "dot_product", diff --git a/haystack/nodes/reader/farm.py b/haystack/nodes/reader/farm.py index 4f94efa18..dd9c82cab 100644 --- a/haystack/nodes/reader/farm.py +++ b/haystack/nodes/reader/farm.py @@ -56,7 +56,7 @@ class FARMReader(BaseReader): progress_bar: bool = True, duplicate_filtering: int = 0, use_confidence_scores: bool = True, - proxies=None, + proxies: Optional[Dict[str, str]] = None, local_files_only=False, force_download=False, use_auth_token: Optional[Union[str,bool]] = None, diff --git a/json-schemas/haystack-pipeline.0.7.schema.json b/json-schemas/haystack-pipeline.0.7.schema.json new file mode 100644 index 000000000..4e3a26993 --- /dev/null +++ b/json-schemas/haystack-pipeline.0.7.schema.json @@ -0,0 +1,3432 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://haystack.deepset.ai/json-schemas/haystack-pipeline.0.7.schema.json", + "title": "Haystack Pipeline", + "description": "Haystack Pipeline YAML file describing the nodes of the pipelines. For more info read the docs at: https://haystack.deepset.ai/components/pipelines#yaml-file-definitions", + "type": "object", + "properties": { + "version": { + "title": "Version", + "description": "Version of the Haystack Pipeline file.", + "type": "string", + "const": "0.7" + }, + "components": { + "title": "Components", + "description": "Component nodes and their configurations, to later be used in the pipelines section. Define here all the building blocks for the pipelines.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/AzureConverterComponent" + }, + { + "$ref": "#/definitions/CrawlerComponent" + }, + { + "$ref": "#/definitions/DensePassageRetrieverComponent" + }, + { + "$ref": "#/definitions/Docs2AnswersComponent" + }, + { + "$ref": "#/definitions/DocxToTextConverterComponent" + }, + { + "$ref": "#/definitions/ElasticsearchFilterOnlyRetrieverComponent" + }, + { + "$ref": "#/definitions/ElasticsearchRetrieverComponent" + }, + { + "$ref": "#/definitions/EmbeddingRetrieverComponent" + }, + { + "$ref": "#/definitions/EntityExtractorComponent" + }, + { + "$ref": "#/definitions/EvalAnswersComponent" + }, + { + "$ref": "#/definitions/EvalDocumentsComponent" + }, + { + "$ref": "#/definitions/FARMReaderComponent" + }, + { + "$ref": "#/definitions/FileTypeClassifierComponent" + }, + { + "$ref": "#/definitions/ImageToTextConverterComponent" + }, + { + "$ref": "#/definitions/JoinDocumentsComponent" + }, + { + "$ref": "#/definitions/MarkdownConverterComponent" + }, + { + "$ref": "#/definitions/PDFToTextConverterComponent" + }, + { + "$ref": "#/definitions/PDFToTextOCRConverterComponent" + }, + { + "$ref": "#/definitions/ParsrConverterComponent" + }, + { + "$ref": "#/definitions/PreProcessorComponent" + }, + { + "$ref": "#/definitions/QuestionGeneratorComponent" + }, + { + "$ref": "#/definitions/RAGeneratorComponent" + }, + { + "$ref": "#/definitions/RCIReaderComponent" + }, + { + "$ref": "#/definitions/SentenceTransformersRankerComponent" + }, + { + "$ref": "#/definitions/Seq2SeqGeneratorComponent" + }, + { + "$ref": "#/definitions/SklearnQueryClassifierComponent" + }, + { + "$ref": "#/definitions/TableReaderComponent" + }, + { + "$ref": "#/definitions/TableTextRetrieverComponent" + }, + { + "$ref": "#/definitions/Text2SparqlRetrieverComponent" + }, + { + "$ref": "#/definitions/TextConverterComponent" + }, + { + "$ref": "#/definitions/TfidfRetrieverComponent" + }, + { + "$ref": "#/definitions/TikaConverterComponent" + }, + { + "$ref": "#/definitions/TransformersDocumentClassifierComponent" + }, + { + "$ref": "#/definitions/TransformersQueryClassifierComponent" + }, + { + "$ref": "#/definitions/TransformersReaderComponent" + }, + { + "$ref": "#/definitions/TransformersSummarizerComponent" + }, + { + "$ref": "#/definitions/TransformersTranslatorComponent" + }, + { + "$ref": "#/definitions/ElasticsearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/FAISSDocumentStoreComponent" + }, + { + "$ref": "#/definitions/GraphDBKnowledgeGraphComponent" + }, + { + "$ref": "#/definitions/InMemoryDocumentStoreComponent" + }, + { + "$ref": "#/definitions/MilvusDocumentStoreComponent" + }, + { + "$ref": "#/definitions/OpenDistroElasticsearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/OpenSearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/SQLDocumentStoreComponent" + }, + { + "$ref": "#/definitions/WeaviateDocumentStoreComponent" + } + ] + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "pipelines": { + "title": "Pipelines", + "description": "Multiple pipelines can be defined using the components from the same YAML file.", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Name of the pipeline.", + "type": "string" + }, + "nodes": { + "title": "Nodes", + "description": "Nodes to be used by this particular pipeline", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The name of this particular node in the pipeline. This should be one of the names from the components defined in the same file.", + "type": "string" + }, + "inputs": { + "title": "Inputs", + "description": "Input parameters for this node.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false + }, + "required": [ + "name", + "nodes" + ], + "additionalProperties": false + } + }, + "additionalProperties": false + } + } + }, + "required": [ + "version", + "components", + "pipelines" + ], + "additionalProperties": false, + "definitions": { + "AzureConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "AzureConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "endpoint": { + "title": "Endpoint", + "type": "string" + }, + "credential_key": { + "title": "Credential Key", + "type": "string" + }, + "model_id": { + "title": "Model Id", + "default": "prebuilt-document", + "type": "string" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + }, + "save_json": { + "title": "Save Json", + "default": false, + "type": "boolean" + }, + "preceding_context_len": { + "title": "Preceding Context Len", + "default": 3, + "type": "integer" + }, + "following_context_len": { + "title": "Following Context Len", + "default": 3, + "type": "integer" + }, + "merge_multiple_column_headers": { + "title": "Merge Multiple Column Headers", + "default": true, + "type": "boolean" + } + }, + "required": [ + "endpoint", + "credential_key" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "CrawlerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Crawler" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "output_dir": { + "title": "Output Dir", + "type": "string" + }, + "urls": { + "title": "Urls", + "type": "array", + "items": { + "type": "string" + } + }, + "crawler_depth": { + "title": "Crawler Depth", + "default": 1, + "type": "integer" + }, + "filter_urls": { + "title": "Filter Urls", + "type": "array", + "items": {} + }, + "overwrite_existing_files": { + "title": "Overwrite Existing Files", + "default": true + } + }, + "required": [ + "output_dir" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "DensePassageRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DensePassageRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "query_embedding_model": { + "title": "Query Embedding Model", + "default": "facebook/dpr-question_encoder-single-nq-base", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "passage_embedding_model": { + "title": "Passage Embedding Model", + "default": "facebook/dpr-ctx_encoder-single-nq-base", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "max_seq_len_query": { + "title": "Max Seq Len Query", + "default": 64, + "type": "integer" + }, + "max_seq_len_passage": { + "title": "Max Seq Len Passage", + "default": 256, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 16, + "type": "integer" + }, + "embed_title": { + "title": "Embed Title", + "default": true, + "type": "boolean" + }, + "use_fast_tokenizers": { + "title": "Use Fast Tokenizers", + "default": true, + "type": "boolean" + }, + "infer_tokenizer_classes": { + "title": "Infer Tokenizer Classes", + "default": false, + "type": "boolean" + }, + "similarity_function": { + "title": "Similarity Function", + "default": "dot_product", + "type": "string" + }, + "global_loss_buffer_size": { + "title": "Global Loss Buffer Size", + "default": 150000, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "string" + }, + { + "type": "boolean" + } + ] + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Docs2AnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Docs2Answers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": {}, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "DocxToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DocxToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchFilterOnlyRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchFilterOnlyRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "custom_query": { + "title": "Custom Query", + "type": "string" + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "custom_query": { + "title": "Custom Query", + "type": "string" + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EmbeddingRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EmbeddingRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "embedding_model": { + "title": "Embedding Model", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 32, + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 512, + "type": "integer" + }, + "model_format": { + "title": "Model Format", + "default": "farm", + "type": "string" + }, + "pooling_strategy": { + "title": "Pooling Strategy", + "default": "reduce_mean", + "type": "string" + }, + "emb_extraction_layer": { + "title": "Emb Extraction Layer", + "default": -1, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "string" + }, + { + "type": "boolean" + } + ] + } + }, + "required": [ + "document_store", + "embedding_model" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EntityExtractorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EntityExtractor" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "dslim/bert-base-NER", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EvalAnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EvalAnswers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "skip_incorrect_retrieval": { + "title": "Skip Incorrect Retrieval", + "default": true, + "type": "boolean" + }, + "open_domain": { + "title": "Open Domain", + "default": true, + "type": "boolean" + }, + "sas_model": { + "title": "Sas Model", + "type": "string" + }, + "debug": { + "title": "Debug", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EvalDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EvalDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "debug": { + "title": "Debug", + "default": false, + "type": "boolean" + }, + "open_domain": { + "title": "Open Domain", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FARMReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FARMReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "context_window_size": { + "title": "Context Window Size", + "default": 150, + "type": "integer" + }, + "batch_size": { + "title": "Batch Size", + "default": 50, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "no_ans_boost": { + "title": "No Ans Boost", + "default": 0.0, + "type": "number" + }, + "return_no_answer": { + "title": "Return No Answer", + "default": false, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 3, + "type": "integer" + }, + "top_k_per_sample": { + "title": "Top K Per Sample", + "default": 1, + "type": "integer" + }, + "num_processes": { + "title": "Num Processes", + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + }, + "doc_stride": { + "title": "Doc Stride", + "default": 128, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_filtering": { + "title": "Duplicate Filtering", + "default": 0, + "type": "integer" + }, + "use_confidence_scores": { + "title": "Use Confidence Scores", + "default": true, + "type": "boolean" + }, + "proxies": { + "title": "Proxies", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "local_files_only": { + "title": "Local Files Only", + "default": false + }, + "force_download": { + "title": "Force Download", + "default": false + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "string" + }, + { + "type": "boolean" + } + ] + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FileTypeClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FileTypeClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": {}, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ImageToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ImageToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "default": [ + "eng" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "JoinDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "JoinDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "join_mode": { + "title": "Join Mode", + "default": "concatenate", + "type": "string" + }, + "weights": { + "title": "Weights", + "type": "array", + "items": { + "type": "number" + } + }, + "top_k_join": { + "title": "Top K Join", + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "MarkdownConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "MarkdownConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PDFToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PDFToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PDFToTextOCRConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PDFToTextOCRConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "default": [ + "eng" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ParsrConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ParsrConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "parsr_url": { + "title": "Parsr Url", + "default": "http://localhost:3001", + "type": "string" + }, + "extractor": { + "title": "Extractor", + "default": "pdfminer", + "enum": [ + "pdfminer", + "pdfjs" + ], + "type": "string" + }, + "table_detection_mode": { + "title": "Table Detection Mode", + "default": "lattice", + "enum": [ + "lattice", + "stream" + ], + "type": "string" + }, + "preceding_context_len": { + "title": "Preceding Context Len", + "default": 1, + "type": "integer" + }, + "following_context_len": { + "title": "Following Context Len", + "default": 1, + "type": "integer" + }, + "remove_page_headers": { + "title": "Remove Page Headers", + "default": false, + "type": "boolean" + }, + "remove_page_footers": { + "title": "Remove Page Footers", + "default": false, + "type": "boolean" + }, + "remove_table_of_contents": { + "title": "Remove Table Of Contents", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PreProcessorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PreProcessor" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "clean_whitespace": { + "title": "Clean Whitespace", + "default": true, + "type": "boolean" + }, + "clean_header_footer": { + "title": "Clean Header Footer", + "default": false, + "type": "boolean" + }, + "clean_empty_lines": { + "title": "Clean Empty Lines", + "default": true, + "type": "boolean" + }, + "split_by": { + "title": "Split By", + "default": "word", + "type": "string" + }, + "split_length": { + "title": "Split Length", + "default": 200, + "type": "integer" + }, + "split_overlap": { + "title": "Split Overlap", + "default": 0, + "type": "integer" + }, + "split_respect_sentence_boundary": { + "title": "Split Respect Sentence Boundary", + "default": true, + "type": "boolean" + }, + "language": { + "title": "Language", + "default": "en", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "QuestionGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "QuestionGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "valhalla/t5-base-e2e-qg" + }, + "model_version": { + "title": "Model Version" + }, + "num_beams": { + "title": "Num Beams", + "default": 4 + }, + "max_length": { + "title": "Max Length", + "default": 256 + }, + "no_repeat_ngram_size": { + "title": "No Repeat Ngram Size", + "default": 3 + }, + "length_penalty": { + "title": "Length Penalty", + "default": 1.5 + }, + "early_stopping": { + "title": "Early Stopping", + "default": true + }, + "split_length": { + "title": "Split Length", + "default": 50 + }, + "split_overlap": { + "title": "Split Overlap", + "default": 10 + }, + "use_gpu": { + "title": "Use Gpu", + "default": true + }, + "prompt": { + "title": "Prompt", + "default": "generate questions:" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RAGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RAGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "facebook/rag-token-nq", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "retriever": { + "title": "Retriever", + "type": "string", + "default": null + }, + "generator_type": { + "default": [ + 1 + ], + "allOf": [ + { + "$ref": "#/definitions/RAGeneratorType" + } + ] + }, + "top_k": { + "title": "Top K", + "default": 2, + "type": "integer" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 2, + "type": "integer" + }, + "num_beams": { + "title": "Num Beams", + "default": 2, + "type": "integer" + }, + "embed_title": { + "title": "Embed Title", + "default": true, + "type": "boolean" + }, + "prefix": { + "title": "Prefix", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RCIReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RCIReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "row_model_name_or_path": { + "title": "Row Model Name Or Path", + "default": "michaelrglass/albert-base-rci-wikisql-row", + "type": "string" + }, + "column_model_name_or_path": { + "title": "Column Model Name Or Path", + "default": "michaelrglass/albert-base-rci-wikisql-col", + "type": "string" + }, + "row_model_version": { + "title": "Row Model Version", + "type": "string" + }, + "column_model_version": { + "title": "Column Model Version", + "type": "string" + }, + "row_tokenizer": { + "title": "Row Tokenizer", + "type": "string" + }, + "column_tokenizer": { + "title": "Column Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SentenceTransformersRankerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SentenceTransformersRanker" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Seq2SeqGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Seq2SeqGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "input_converter": { + "title": "Input Converter", + "type": "string", + "default": null + }, + "top_k": { + "title": "Top K", + "default": 1, + "type": "integer" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 2, + "type": "integer" + }, + "num_beams": { + "title": "Num Beams", + "default": 8, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SklearnQueryClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SklearnQueryClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "https://ext-models-haystack.s3.eu-central-1.amazonaws.com/gradboost_query_classifier/model.pickle", + "anyOf": [ + { + "type": "string" + }, + {} + ] + }, + "vectorizer_name_or_path": { + "title": "Vectorizer Name Or Path", + "default": "https://ext-models-haystack.s3.eu-central-1.amazonaws.com/gradboost_query_classifier/vectorizer.pickle", + "anyOf": [ + { + "type": "string" + }, + {} + ] + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TableReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TableReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "google/tapas-base-finetuned-wtq", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TableTextRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TableTextRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "query_embedding_model": { + "title": "Query Embedding Model", + "default": "deepset/bert-small-mm_retrieval-question_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "passage_embedding_model": { + "title": "Passage Embedding Model", + "default": "deepset/bert-small-mm_retrieval-passage_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "table_embedding_model": { + "title": "Table Embedding Model", + "default": "deepset/bert-small-mm_retrieval-table_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "max_seq_len_query": { + "title": "Max Seq Len Query", + "default": 64, + "type": "integer" + }, + "max_seq_len_passage": { + "title": "Max Seq Len Passage", + "default": 256, + "type": "integer" + }, + "max_seq_len_table": { + "title": "Max Seq Len Table", + "default": 256, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 16, + "type": "integer" + }, + "embed_meta_fields": { + "title": "Embed Meta Fields", + "default": [ + "name", + "section_title", + "caption" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "use_fast_tokenizers": { + "title": "Use Fast Tokenizers", + "default": true, + "type": "boolean" + }, + "infer_tokenizer_classes": { + "title": "Infer Tokenizer Classes", + "default": false, + "type": "boolean" + }, + "similarity_function": { + "title": "Similarity Function", + "default": "dot_product", + "type": "string" + }, + "global_loss_buffer_size": { + "title": "Global Loss Buffer Size", + "default": 150000, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "string" + }, + { + "type": "boolean" + } + ] + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Text2SparqlRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Text2SparqlRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "knowledge_graph": { + "title": "Knowledge Graph" + }, + "model_name_or_path": { + "title": "Model Name Or Path" + }, + "top_k": { + "title": "Top K", + "default": 1, + "type": "integer" + } + }, + "required": [ + "knowledge_graph", + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TfidfRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TfidfRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "auto_fit": { + "title": "Auto Fit", + "default": true + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TikaConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TikaConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "tika_url": { + "title": "Tika Url", + "default": "http://localhost:9998/tika", + "type": "string" + }, + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersDocumentClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersDocumentClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "bhadresh-savani/distilbert-base-uncased-emotion", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "return_all_scores": { + "title": "Return All Scores", + "default": false, + "type": "boolean" + }, + "task": { + "title": "Task", + "default": "text-classification", + "type": "string" + }, + "labels": { + "title": "Labels", + "type": "array", + "items": { + "type": "string" + } + }, + "batch_size": { + "title": "Batch Size", + "default": -1, + "type": "integer" + }, + "classification_field": { + "title": "Classification Field", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersQueryClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersQueryClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "shahrukhx01/bert-mini-finetune-question-detection", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "distilbert-base-uncased-distilled-squad", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "context_window_size": { + "title": "Context Window Size", + "default": 70, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 4, + "type": "integer" + }, + "return_no_answers": { + "title": "Return No Answers", + "default": true, + "type": "boolean" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + }, + "doc_stride": { + "title": "Doc Stride", + "default": 128, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersSummarizerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersSummarizer" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "google/pegasus-xsum", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 5, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "clean_up_tokenization_spaces": { + "title": "Clean Up Tokenization Spaces", + "default": true, + "type": "boolean" + }, + "separator_for_single_summary": { + "title": "Separator For Single Summary", + "default": " ", + "type": "string" + }, + "generate_single_summary": { + "title": "Generate Single Summary", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersTranslatorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersTranslator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "tokenizer_name": { + "title": "Tokenizer Name", + "type": "string" + }, + "max_seq_len": { + "title": "Max Seq Len", + "type": "integer" + }, + "clean_up_tokenization_spaces": { + "title": "Clean Up Tokenization Spaces", + "default": true, + "type": "boolean" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "localhost", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "port": { + "title": "Port", + "default": 9200, + "anyOf": [ + { + "type": "integer" + }, + { + "type": "array", + "items": { + "type": "integer" + } + } + ] + }, + "username": { + "title": "Username", + "default": "", + "type": "string" + }, + "password": { + "title": "Password", + "default": "", + "type": "string" + }, + "api_key_id": { + "title": "Api Key Id", + "type": "string" + }, + "api_key": { + "title": "Api Key", + "type": "string" + }, + "aws4auth": { + "title": "Aws4Auth" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "search_fields": { + "title": "Search Fields", + "default": "content", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": {} + } + ] + }, + "content_field": { + "title": "Content Field", + "default": "content", + "type": "string" + }, + "name_field": { + "title": "Name Field", + "default": "name", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "custom_mapping": { + "title": "Custom Mapping", + "type": "object" + }, + "excluded_meta_data": { + "title": "Excluded Meta Data", + "type": "array", + "items": {} + }, + "analyzer": { + "title": "Analyzer", + "default": "standard", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "http", + "type": "string" + }, + "ca_certs": { + "title": "Ca Certs", + "type": "string" + }, + "verify_certs": { + "title": "Verify Certs", + "default": true, + "type": "boolean" + }, + "create_index": { + "title": "Create Index", + "default": true, + "type": "boolean" + }, + "refresh_type": { + "title": "Refresh Type", + "default": "wait_for", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product" + }, + "timeout": { + "title": "Timeout", + "default": 30 + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "flat", + "type": "string" + }, + "scroll": { + "title": "Scroll", + "default": "1d", + "type": "string" + }, + "skip_missing_embeddings": { + "title": "Skip Missing Embeddings", + "default": true, + "type": "boolean" + }, + "synonyms": { + "title": "Synonyms", + "type": "array", + "items": {} + }, + "synonym_type": { + "title": "Synonym Type", + "default": "synonym", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FAISSDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FAISSDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "sql_url": { + "title": "Sql Url", + "default": "sqlite:///faiss_document_store.db", + "type": "string" + }, + "vector_dim": { + "title": "Vector Dim", + "type": "integer" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "faiss_index_factory_str": { + "title": "Faiss Index Factory Str", + "default": "Flat", + "type": "string" + }, + "faiss_index": { + "title": "Faiss Index", + "type": "string", + "default": null + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "faiss_index_path": { + "title": "Faiss Index Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "faiss_config_path": { + "title": "Faiss Config Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "GraphDBKnowledgeGraphComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "GraphDBKnowledgeGraph" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "localhost", + "type": "string" + }, + "port": { + "title": "Port", + "default": 7200, + "type": "integer" + }, + "username": { + "title": "Username", + "default": "", + "type": "string" + }, + "password": { + "title": "Password", + "default": "", + "type": "string" + }, + "index": { + "title": "Index", + "type": "string" + }, + "prefixes": { + "title": "Prefixes", + "default": "", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "InMemoryDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "InMemoryDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "MilvusDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "MilvusDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "sql_url": { + "title": "Sql Url", + "default": "sqlite:///", + "type": "string" + }, + "milvus_url": { + "title": "Milvus Url", + "default": "tcp://localhost:19530", + "type": "string" + }, + "connection_pool": { + "title": "Connection Pool", + "default": "SingletonThread", + "type": "string" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "vector_dim": { + "title": "Vector Dim", + "type": "integer" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "index_file_size": { + "title": "Index File Size", + "default": 1024, + "type": "integer" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "index_type": { + "default": 1, + "allOf": [ + { + "$ref": "#/definitions/IndexType" + } + ] + }, + "index_param": { + "title": "Index Param", + "type": "object" + }, + "search_param": { + "title": "Search Param", + "type": "object" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "OpenDistroElasticsearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "OpenDistroElasticsearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "https://admin:admin@localhost:9200/" + }, + "similarity": { + "title": "Similarity", + "default": "cosine" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "OpenSearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "OpenSearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "verify_certs": { + "title": "Verify Certs", + "default": false + }, + "scheme": { + "title": "Scheme", + "default": "https" + }, + "username": { + "title": "Username", + "default": "admin" + }, + "password": { + "title": "Password", + "default": "admin" + }, + "port": { + "title": "Port", + "default": 9200 + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SQLDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SQLDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "url": { + "title": "Url", + "default": "sqlite://", + "type": "string" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "check_same_thread": { + "title": "Check Same Thread", + "default": false, + "type": "boolean" + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "WeaviateDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "WeaviateDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "http://localhost", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "port": { + "title": "Port", + "default": 8080, + "anyOf": [ + { + "type": "integer" + }, + { + "type": "array", + "items": { + "type": "integer" + } + } + ] + }, + "timeout_config": { + "title": "Timeout Config", + "default": [ + 5, + 15 + ], + "type": "array", + "items": {} + }, + "username": { + "title": "Username", + "type": "string" + }, + "password": { + "title": "Password", + "type": "string" + }, + "index": { + "title": "Index", + "default": "Document", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "content_field": { + "title": "Content Field", + "default": "content", + "type": "string" + }, + "name_field": { + "title": "Name Field", + "default": "name", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "cosine", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "hnsw", + "type": "string" + }, + "custom_schema": { + "title": "Custom Schema", + "type": "object" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RAGeneratorType": { + "title": "RAGeneratorType", + "description": "An enumeration.", + "enum": [ + [ + 1 + ], + 2 + ] + }, + "IndexType": { + "title": "IndexType", + "description": "Index type enum.\n ", + "enum": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 11, + 12 + ], + "type": "integer" + } + } +} \ No newline at end of file diff --git a/requirements-json-schema.txt b/requirements-json-schema.txt new file mode 100644 index 000000000..0189eb3d0 --- /dev/null +++ b/requirements-json-schema.txt @@ -0,0 +1 @@ +pygithub==1.55