| 
									
										
										
										
											2023-09-27 16:06:32 +08:00
										 |  |  | import flask_restful | 
					
						
							| 
									
										
										
										
											2024-02-06 13:21:13 +08:00
										 |  |  | from flask import current_app, request | 
					
						
							|  |  |  | from flask_login import current_user | 
					
						
							|  |  |  | from flask_restful import Resource, marshal, marshal_with, reqparse | 
					
						
							|  |  |  | from werkzeug.exceptions import Forbidden, NotFound | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | import services | 
					
						
							|  |  |  | from controllers.console import api | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | from controllers.console.apikey import api_key_fields, api_key_list | 
					
						
							| 
									
										
										
										
											2023-08-12 00:57:00 +08:00
										 |  |  | from controllers.console.app.error import ProviderNotInitializeError | 
					
						
							| 
									
										
										
										
											2024-06-26 12:51:50 +08:00
										 |  |  | from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | from controllers.console.setup import setup_required | 
					
						
							|  |  |  | from controllers.console.wraps import account_initialization_required | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | from core.indexing_runner import IndexingRunner | 
					
						
							|  |  |  | from core.model_runtime.entities.model_entities import ModelType | 
					
						
							|  |  |  | from core.provider_manager import ProviderManager | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  | from core.rag.datasource.vdb.vector_type import VectorType | 
					
						
							| 
									
										
										
										
											2024-02-22 23:31:57 +08:00
										 |  |  | from core.rag.extractor.entity.extract_setting import ExtractSetting | 
					
						
							| 
									
										
										
										
											2024-06-19 16:05:27 +08:00
										 |  |  | from core.rag.retrieval.retrival_methods import RetrievalMethod | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | from extensions.ext_database import db | 
					
						
							| 
									
										
										
										
											2023-09-27 16:06:32 +08:00
										 |  |  | from fields.app_fields import related_app_list | 
					
						
							|  |  |  | from fields.dataset_fields import dataset_detail_fields, dataset_query_detail_fields | 
					
						
							|  |  |  | from fields.document_fields import document_status_fields | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | from libs.login import login_required | 
					
						
							| 
									
										
										
										
											2024-01-19 20:12:04 +08:00
										 |  |  | from models.dataset import Dataset, Document, DocumentSegment | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | from models.model import ApiToken, UploadFile | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | from services.dataset_service import DatasetService, DocumentService | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _validate_name(name): | 
					
						
							|  |  |  |     if not name or len(name) < 1 or len(name) > 40: | 
					
						
							|  |  |  |         raise ValueError('Name must be between 1 to 40 characters.') | 
					
						
							|  |  |  |     return name | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _validate_description_length(description): | 
					
						
							| 
									
										
										
										
											2023-06-02 14:03:18 +08:00
										 |  |  |     if len(description) > 400: | 
					
						
							|  |  |  |         raise ValueError('Description cannot exceed 400 characters.') | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     return description | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class DatasetListApi(Resource): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def get(self): | 
					
						
							|  |  |  |         page = request.args.get('page', default=1, type=int) | 
					
						
							|  |  |  |         limit = request.args.get('limit', default=20, type=int) | 
					
						
							|  |  |  |         ids = request.args.getlist('ids') | 
					
						
							|  |  |  |         provider = request.args.get('provider', default="vendor") | 
					
						
							| 
									
										
										
										
											2024-04-24 15:02:29 +08:00
										 |  |  |         search = request.args.get('keyword', default=None, type=str) | 
					
						
							|  |  |  |         tag_ids = request.args.getlist('tag_ids') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         if ids: | 
					
						
							|  |  |  |             datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             datasets, total = DatasetService.get_datasets(page, limit, provider, | 
					
						
							| 
									
										
										
										
											2024-04-24 15:02:29 +08:00
										 |  |  |                                                           current_user.current_tenant_id, current_user, search, tag_ids) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |         # check embedding setting | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         provider_manager = ProviderManager() | 
					
						
							|  |  |  |         configurations = provider_manager.get_configurations( | 
					
						
							|  |  |  |             tenant_id=current_user.current_tenant_id | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         embedding_models = configurations.get_models( | 
					
						
							|  |  |  |             model_type=ModelType.TEXT_EMBEDDING, | 
					
						
							|  |  |  |             only_active=True | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-25 00:25:16 +08:00
										 |  |  |         model_names = [] | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         for embedding_model in embedding_models: | 
					
						
							|  |  |  |             model_names.append(f"{embedding_model.model}:{embedding_model.provider.provider}") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |         data = marshal(datasets, dataset_detail_fields) | 
					
						
							|  |  |  |         for item in data: | 
					
						
							| 
									
										
										
										
											2023-08-29 03:37:45 +08:00
										 |  |  |             if item['indexing_technique'] == 'high_quality': | 
					
						
							|  |  |  |                 item_model = f"{item['embedding_model']}:{item['embedding_model_provider']}" | 
					
						
							|  |  |  |                 if item_model in model_names: | 
					
						
							|  |  |  |                     item['embedding_available'] = True | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     item['embedding_available'] = False | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |             else: | 
					
						
							| 
									
										
										
										
											2023-08-29 03:37:45 +08:00
										 |  |  |                 item['embedding_available'] = True | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         response = { | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |             'data': data, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |             'has_more': len(datasets) == limit, | 
					
						
							|  |  |  |             'limit': limit, | 
					
						
							|  |  |  |             'total': total, | 
					
						
							|  |  |  |             'page': page | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         return response, 200 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def post(self): | 
					
						
							|  |  |  |         parser = reqparse.RequestParser() | 
					
						
							|  |  |  |         parser.add_argument('name', nullable=False, required=True, | 
					
						
							|  |  |  |                             help='type is required. Name must be between 1 to 40 characters.', | 
					
						
							|  |  |  |                             type=_validate_name) | 
					
						
							|  |  |  |         parser.add_argument('indexing_technique', type=str, location='json', | 
					
						
							| 
									
										
										
										
											2024-01-19 20:12:04 +08:00
										 |  |  |                             choices=Dataset.INDEXING_TECHNIQUE_LIST, | 
					
						
							|  |  |  |                             nullable=True, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                             help='Invalid indexing technique.') | 
					
						
							|  |  |  |         args = parser.parse_args() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 07:34:25 -05:00
										 |  |  |         # The role of the current user in the ta table must be admin, owner, or editor | 
					
						
							|  |  |  |         if not current_user.is_editor: | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |             raise Forbidden() | 
					
						
							| 
									
										
										
										
											2023-08-12 00:57:00 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         try: | 
					
						
							|  |  |  |             dataset = DatasetService.create_empty_dataset( | 
					
						
							|  |  |  |                 tenant_id=current_user.current_tenant_id, | 
					
						
							|  |  |  |                 name=args['name'], | 
					
						
							|  |  |  |                 indexing_technique=args['indexing_technique'], | 
					
						
							|  |  |  |                 account=current_user | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         except services.errors.dataset.DatasetNameDuplicateError: | 
					
						
							|  |  |  |             raise DatasetNameDuplicateError() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return marshal(dataset, dataset_detail_fields), 201 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class DatasetApi(Resource): | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def get(self, dataset_id): | 
					
						
							|  |  |  |         dataset_id_str = str(dataset_id) | 
					
						
							|  |  |  |         dataset = DatasetService.get_dataset(dataset_id_str) | 
					
						
							|  |  |  |         if dataset is None: | 
					
						
							|  |  |  |             raise NotFound("Dataset not found.") | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             DatasetService.check_dataset_permission( | 
					
						
							|  |  |  |                 dataset, current_user) | 
					
						
							|  |  |  |         except services.errors.account.NoPermissionError as e: | 
					
						
							|  |  |  |             raise Forbidden(str(e)) | 
					
						
							| 
									
										
										
										
											2023-08-30 17:27:19 +08:00
										 |  |  |         data = marshal(dataset, dataset_detail_fields) | 
					
						
							|  |  |  |         # check embedding setting | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         provider_manager = ProviderManager() | 
					
						
							|  |  |  |         configurations = provider_manager.get_configurations( | 
					
						
							|  |  |  |             tenant_id=current_user.current_tenant_id | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         embedding_models = configurations.get_models( | 
					
						
							|  |  |  |             model_type=ModelType.TEXT_EMBEDDING, | 
					
						
							|  |  |  |             only_active=True | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-30 17:27:19 +08:00
										 |  |  |         model_names = [] | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         for embedding_model in embedding_models: | 
					
						
							|  |  |  |             model_names.append(f"{embedding_model.model}:{embedding_model.provider.provider}") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-30 17:27:19 +08:00
										 |  |  |         if data['indexing_technique'] == 'high_quality': | 
					
						
							|  |  |  |             item_model = f"{data['embedding_model']}:{data['embedding_model_provider']}" | 
					
						
							|  |  |  |             if item_model in model_names: | 
					
						
							|  |  |  |                 data['embedding_available'] = True | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 data['embedding_available'] = False | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             data['embedding_available'] = True | 
					
						
							|  |  |  |         return data, 200 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def patch(self, dataset_id): | 
					
						
							|  |  |  |         dataset_id_str = str(dataset_id) | 
					
						
							| 
									
										
										
										
											2023-08-29 03:37:45 +08:00
										 |  |  |         dataset = DatasetService.get_dataset(dataset_id_str) | 
					
						
							|  |  |  |         if dataset is None: | 
					
						
							|  |  |  |             raise NotFound("Dataset not found.") | 
					
						
							|  |  |  |         # check user's model setting | 
					
						
							|  |  |  |         DatasetService.check_dataset_model_setting(dataset) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         parser = reqparse.RequestParser() | 
					
						
							|  |  |  |         parser.add_argument('name', nullable=False, | 
					
						
							|  |  |  |                             help='type is required. Name must be between 1 to 40 characters.', | 
					
						
							|  |  |  |                             type=_validate_name) | 
					
						
							|  |  |  |         parser.add_argument('description', | 
					
						
							|  |  |  |                             location='json', store_missing=False, | 
					
						
							|  |  |  |                             type=_validate_description_length) | 
					
						
							|  |  |  |         parser.add_argument('indexing_technique', type=str, location='json', | 
					
						
							| 
									
										
										
										
											2024-02-22 23:31:57 +08:00
										 |  |  |                             choices=Dataset.INDEXING_TECHNIQUE_LIST, | 
					
						
							|  |  |  |                             nullable=True, | 
					
						
							|  |  |  |                             help='Invalid indexing technique.') | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         parser.add_argument('permission', type=str, location='json', choices=( | 
					
						
							|  |  |  |             'only_me', 'all_team_members'), help='Invalid permission.') | 
					
						
							| 
									
										
										
										
											2024-04-24 15:02:29 +08:00
										 |  |  |         parser.add_argument('embedding_model', type=str, | 
					
						
							|  |  |  |                             location='json', help='Invalid embedding model.') | 
					
						
							|  |  |  |         parser.add_argument('embedding_model_provider', type=str, | 
					
						
							|  |  |  |                             location='json', help='Invalid embedding model provider.') | 
					
						
							| 
									
										
										
										
											2023-11-17 22:13:37 +08:00
										 |  |  |         parser.add_argument('retrieval_model', type=dict, location='json', help='Invalid retrieval model.') | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         args = parser.parse_args() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 07:34:25 -05:00
										 |  |  |         # The role of the current user in the ta table must be admin, owner, or editor | 
					
						
							|  |  |  |         if not current_user.is_editor: | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |             raise Forbidden() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dataset = DatasetService.update_dataset( | 
					
						
							|  |  |  |             dataset_id_str, args, current_user) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if dataset is None: | 
					
						
							|  |  |  |             raise NotFound("Dataset not found.") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return marshal(dataset, dataset_detail_fields), 200 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def delete(self, dataset_id): | 
					
						
							|  |  |  |         dataset_id_str = str(dataset_id) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 07:34:25 -05:00
										 |  |  |         # The role of the current user in the ta table must be admin, owner, or editor | 
					
						
							|  |  |  |         if not current_user.is_editor: | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |             raise Forbidden() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 03:25:38 +08:00
										 |  |  |         try: | 
					
						
							|  |  |  |             if DatasetService.delete_dataset(dataset_id_str, current_user): | 
					
						
							|  |  |  |                 return {'result': 'success'}, 204 | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise NotFound("Dataset not found.") | 
					
						
							|  |  |  |         except services.errors.dataset.DatasetInUseError: | 
					
						
							|  |  |  |             raise DatasetInUseError() | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-01 17:06:51 +08:00
										 |  |  | class DatasetUseCheckApi(Resource): | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def get(self, dataset_id): | 
					
						
							|  |  |  |         dataset_id_str = str(dataset_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dataset_is_using = DatasetService.dataset_use_check(dataset_id_str) | 
					
						
							|  |  |  |         return {'is_using': dataset_is_using}, 200 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | class DatasetQueryApi(Resource): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def get(self, dataset_id): | 
					
						
							|  |  |  |         dataset_id_str = str(dataset_id) | 
					
						
							|  |  |  |         dataset = DatasetService.get_dataset(dataset_id_str) | 
					
						
							|  |  |  |         if dataset is None: | 
					
						
							|  |  |  |             raise NotFound("Dataset not found.") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             DatasetService.check_dataset_permission(dataset, current_user) | 
					
						
							|  |  |  |         except services.errors.account.NoPermissionError as e: | 
					
						
							|  |  |  |             raise Forbidden(str(e)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         page = request.args.get('page', default=1, type=int) | 
					
						
							|  |  |  |         limit = request.args.get('limit', default=20, type=int) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         dataset_queries, total = DatasetService.get_dataset_queries( | 
					
						
							|  |  |  |             dataset_id=dataset.id, | 
					
						
							|  |  |  |             page=page, | 
					
						
							|  |  |  |             per_page=limit | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         response = { | 
					
						
							|  |  |  |             'data': marshal(dataset_queries, dataset_query_detail_fields), | 
					
						
							|  |  |  |             'has_more': len(dataset_queries) == limit, | 
					
						
							|  |  |  |             'limit': limit, | 
					
						
							|  |  |  |             'total': total, | 
					
						
							|  |  |  |             'page': page | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         return response, 200 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class DatasetIndexingEstimateApi(Resource): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def post(self): | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         parser = reqparse.RequestParser() | 
					
						
							|  |  |  |         parser.add_argument('info_list', type=dict, required=True, nullable=True, location='json') | 
					
						
							|  |  |  |         parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json') | 
					
						
							| 
									
										
										
										
											2024-02-22 23:31:57 +08:00
										 |  |  |         parser.add_argument('indexing_technique', type=str, required=True, | 
					
						
							| 
									
										
										
										
											2024-01-19 20:12:04 +08:00
										 |  |  |                             choices=Dataset.INDEXING_TECHNIQUE_LIST, | 
					
						
							|  |  |  |                             nullable=True, location='json') | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |         parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json') | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |         parser.add_argument('dataset_id', type=str, required=False, nullable=False, location='json') | 
					
						
							| 
									
										
										
										
											2023-09-27 16:06:32 +08:00
										 |  |  |         parser.add_argument('doc_language', type=str, default='English', required=False, nullable=False, | 
					
						
							|  |  |  |                             location='json') | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         args = parser.parse_args() | 
					
						
							|  |  |  |         # validate args | 
					
						
							|  |  |  |         DocumentService.estimate_args_validate(args) | 
					
						
							| 
									
										
										
										
											2024-02-22 23:31:57 +08:00
										 |  |  |         extract_settings = [] | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         if args['info_list']['data_source_type'] == 'upload_file': | 
					
						
							|  |  |  |             file_ids = args['info_list']['file_info_list']['file_ids'] | 
					
						
							|  |  |  |             file_details = db.session.query(UploadFile).filter( | 
					
						
							|  |  |  |                 UploadFile.tenant_id == current_user.current_tenant_id, | 
					
						
							|  |  |  |                 UploadFile.id.in_(file_ids) | 
					
						
							|  |  |  |             ).all() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if file_details is None: | 
					
						
							|  |  |  |                 raise NotFound("File not found.") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-22 23:31:57 +08:00
										 |  |  |             if file_details: | 
					
						
							|  |  |  |                 for file_detail in file_details: | 
					
						
							|  |  |  |                     extract_setting = ExtractSetting( | 
					
						
							|  |  |  |                         datasource_type="upload_file", | 
					
						
							|  |  |  |                         upload_file=file_detail, | 
					
						
							|  |  |  |                         document_model=args['doc_form'] | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                     extract_settings.append(extract_setting) | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         elif args['info_list']['data_source_type'] == 'notion_import': | 
					
						
							| 
									
										
										
										
											2024-02-22 23:31:57 +08:00
										 |  |  |             notion_info_list = args['info_list']['notion_info_list'] | 
					
						
							|  |  |  |             for notion_info in notion_info_list: | 
					
						
							|  |  |  |                 workspace_id = notion_info['workspace_id'] | 
					
						
							|  |  |  |                 for page in notion_info['pages']: | 
					
						
							|  |  |  |                     extract_setting = ExtractSetting( | 
					
						
							|  |  |  |                         datasource_type="notion_import", | 
					
						
							|  |  |  |                         notion_info={ | 
					
						
							|  |  |  |                             "notion_workspace_id": workspace_id, | 
					
						
							|  |  |  |                             "notion_obj_id": page['page_id'], | 
					
						
							| 
									
										
										
										
											2024-02-27 11:39:05 +08:00
										 |  |  |                             "notion_page_type": page['type'], | 
					
						
							|  |  |  |                             "tenant_id": current_user.current_tenant_id | 
					
						
							| 
									
										
										
										
											2024-02-22 23:31:57 +08:00
										 |  |  |                         }, | 
					
						
							|  |  |  |                         document_model=args['doc_form'] | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                     extract_settings.append(extract_setting) | 
					
						
							| 
									
										
										
										
											2024-06-15 02:46:02 +08:00
										 |  |  |         elif args['info_list']['data_source_type'] == 'website_crawl': | 
					
						
							|  |  |  |             website_info_list = args['info_list']['website_info_list'] | 
					
						
							|  |  |  |             for url in website_info_list['urls']: | 
					
						
							|  |  |  |                 extract_setting = ExtractSetting( | 
					
						
							|  |  |  |                     datasource_type="website_crawl", | 
					
						
							|  |  |  |                     website_info={ | 
					
						
							|  |  |  |                         "provider": website_info_list['provider'], | 
					
						
							|  |  |  |                         "job_id": website_info_list['job_id'], | 
					
						
							|  |  |  |                         "url": url, | 
					
						
							|  |  |  |                         "tenant_id": current_user.current_tenant_id, | 
					
						
							|  |  |  |                         "mode": 'crawl', | 
					
						
							|  |  |  |                         "only_main_content": website_info_list['only_main_content'] | 
					
						
							|  |  |  |                     }, | 
					
						
							|  |  |  |                     document_model=args['doc_form'] | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |                 extract_settings.append(extract_setting) | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         else: | 
					
						
							|  |  |  |             raise ValueError('Data source type not support') | 
					
						
							| 
									
										
										
										
											2024-02-22 23:31:57 +08:00
										 |  |  |         indexing_runner = IndexingRunner() | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             response = indexing_runner.indexing_estimate(current_user.current_tenant_id, extract_settings, | 
					
						
							|  |  |  |                                                          args['process_rule'], args['doc_form'], | 
					
						
							|  |  |  |                                                          args['doc_language'], args['dataset_id'], | 
					
						
							|  |  |  |                                                          args['indexing_technique']) | 
					
						
							|  |  |  |         except LLMBadRequestError: | 
					
						
							|  |  |  |             raise ProviderNotInitializeError( | 
					
						
							|  |  |  |                 "No Embedding Model available. Please configure a valid provider " | 
					
						
							|  |  |  |                 "in the Settings -> Model Provider.") | 
					
						
							|  |  |  |         except ProviderTokenNotInitError as ex: | 
					
						
							|  |  |  |             raise ProviderNotInitializeError(ex.description) | 
					
						
							| 
									
										
										
										
											2024-06-26 12:51:50 +08:00
										 |  |  |         except Exception as e: | 
					
						
							|  |  |  |             raise IndexingEstimateError(str(e)) | 
					
						
							| 
									
										
										
										
											2024-02-22 23:31:57 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         return response, 200 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class DatasetRelatedAppListApi(Resource): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     @marshal_with(related_app_list) | 
					
						
							|  |  |  |     def get(self, dataset_id): | 
					
						
							|  |  |  |         dataset_id_str = str(dataset_id) | 
					
						
							|  |  |  |         dataset = DatasetService.get_dataset(dataset_id_str) | 
					
						
							|  |  |  |         if dataset is None: | 
					
						
							|  |  |  |             raise NotFound("Dataset not found.") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             DatasetService.check_dataset_permission(dataset, current_user) | 
					
						
							|  |  |  |         except services.errors.account.NoPermissionError as e: | 
					
						
							|  |  |  |             raise Forbidden(str(e)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         app_dataset_joins = DatasetService.get_related_apps(dataset.id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         related_apps = [] | 
					
						
							|  |  |  |         for app_dataset_join in app_dataset_joins: | 
					
						
							|  |  |  |             app_model = app_dataset_join.app | 
					
						
							|  |  |  |             if app_model: | 
					
						
							|  |  |  |                 related_apps.append(app_model) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'data': related_apps, | 
					
						
							|  |  |  |             'total': len(related_apps) | 
					
						
							|  |  |  |         }, 200 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | class DatasetIndexingStatusApi(Resource): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def get(self, dataset_id): | 
					
						
							|  |  |  |         dataset_id = str(dataset_id) | 
					
						
							|  |  |  |         documents = db.session.query(Document).filter( | 
					
						
							|  |  |  |             Document.dataset_id == dataset_id, | 
					
						
							|  |  |  |             Document.tenant_id == current_user.current_tenant_id | 
					
						
							|  |  |  |         ).all() | 
					
						
							|  |  |  |         documents_status = [] | 
					
						
							|  |  |  |         for document in documents: | 
					
						
							|  |  |  |             completed_segments = DocumentSegment.query.filter(DocumentSegment.completed_at.isnot(None), | 
					
						
							|  |  |  |                                                               DocumentSegment.document_id == str(document.id), | 
					
						
							|  |  |  |                                                               DocumentSegment.status != 're_segment').count() | 
					
						
							|  |  |  |             total_segments = DocumentSegment.query.filter(DocumentSegment.document_id == str(document.id), | 
					
						
							|  |  |  |                                                           DocumentSegment.status != 're_segment').count() | 
					
						
							|  |  |  |             document.completed_segments = completed_segments | 
					
						
							|  |  |  |             document.total_segments = total_segments | 
					
						
							| 
									
										
										
										
											2023-09-27 16:06:32 +08:00
										 |  |  |             documents_status.append(marshal(document, document_status_fields)) | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         data = { | 
					
						
							|  |  |  |             'data': documents_status | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         return data | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-27 16:06:32 +08:00
										 |  |  | class DatasetApiKeyApi(Resource): | 
					
						
							|  |  |  |     max_keys = 10 | 
					
						
							|  |  |  |     token_prefix = 'dataset-' | 
					
						
							|  |  |  |     resource_type = 'dataset' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     @marshal_with(api_key_list) | 
					
						
							|  |  |  |     def get(self): | 
					
						
							|  |  |  |         keys = db.session.query(ApiToken). \ | 
					
						
							|  |  |  |             filter(ApiToken.type == self.resource_type, ApiToken.tenant_id == current_user.current_tenant_id). \ | 
					
						
							|  |  |  |             all() | 
					
						
							|  |  |  |         return {"items": keys} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     @marshal_with(api_key_fields) | 
					
						
							|  |  |  |     def post(self): | 
					
						
							|  |  |  |         # The role of the current user in the ta table must be admin or owner | 
					
						
							| 
									
										
										
										
											2024-01-26 12:47:42 +08:00
										 |  |  |         if not current_user.is_admin_or_owner: | 
					
						
							| 
									
										
										
										
											2023-09-27 16:06:32 +08:00
										 |  |  |             raise Forbidden() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         current_key_count = db.session.query(ApiToken). \ | 
					
						
							|  |  |  |             filter(ApiToken.type == self.resource_type, ApiToken.tenant_id == current_user.current_tenant_id). \ | 
					
						
							|  |  |  |             count() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if current_key_count >= self.max_keys: | 
					
						
							|  |  |  |             flask_restful.abort( | 
					
						
							|  |  |  |                 400, | 
					
						
							|  |  |  |                 message=f"Cannot create more than {self.max_keys} API keys for this resource type.", | 
					
						
							|  |  |  |                 code='max_keys_exceeded' | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         key = ApiToken.generate_api_key(self.token_prefix, 24) | 
					
						
							|  |  |  |         api_token = ApiToken() | 
					
						
							|  |  |  |         api_token.tenant_id = current_user.current_tenant_id | 
					
						
							|  |  |  |         api_token.token = key | 
					
						
							|  |  |  |         api_token.type = self.resource_type | 
					
						
							|  |  |  |         db.session.add(api_token) | 
					
						
							|  |  |  |         db.session.commit() | 
					
						
							|  |  |  |         return api_token, 200 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-28 10:41:41 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | class DatasetApiDeleteApi(Resource): | 
					
						
							|  |  |  |     resource_type = 'dataset' | 
					
						
							| 
									
										
										
										
											2023-11-17 22:13:37 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-27 16:06:32 +08:00
										 |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def delete(self, api_key_id): | 
					
						
							|  |  |  |         api_key_id = str(api_key_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # The role of the current user in the ta table must be admin or owner | 
					
						
							| 
									
										
										
										
											2024-01-26 12:47:42 +08:00
										 |  |  |         if not current_user.is_admin_or_owner: | 
					
						
							| 
									
										
										
										
											2023-09-27 16:06:32 +08:00
										 |  |  |             raise Forbidden() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         key = db.session.query(ApiToken). \ | 
					
						
							|  |  |  |             filter(ApiToken.tenant_id == current_user.current_tenant_id, ApiToken.type == self.resource_type, | 
					
						
							|  |  |  |                    ApiToken.id == api_key_id). \ | 
					
						
							|  |  |  |             first() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if key is None: | 
					
						
							|  |  |  |             flask_restful.abort(404, message='API key not found') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         db.session.query(ApiToken).filter(ApiToken.id == api_key_id).delete() | 
					
						
							|  |  |  |         db.session.commit() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return {'result': 'success'}, 204 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class DatasetApiBaseUrlApi(Resource): | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def get(self): | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'api_base_url': (current_app.config['SERVICE_API_URL'] if current_app.config['SERVICE_API_URL'] | 
					
						
							|  |  |  |                              else request.host_url.rstrip('/')) + '/v1' | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-17 22:13:37 +08:00
										 |  |  | class DatasetRetrievalSettingApi(Resource): | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def get(self): | 
					
						
							|  |  |  |         vector_type = current_app.config['VECTOR_STORE'] | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  |         match vector_type: | 
					
						
							| 
									
										
										
										
											2024-06-22 01:48:07 +08:00
										 |  |  |             case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCENT | VectorType.ORACLE: | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  |                 return { | 
					
						
							|  |  |  |                     'retrieval_method': [ | 
					
						
							| 
									
										
										
										
											2024-06-19 16:05:27 +08:00
										 |  |  |                         RetrievalMethod.SEMANTIC_SEARCH | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  |                     ] | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2024-06-19 12:44:33 +08:00
										 |  |  |             case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH: | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  |                 return { | 
					
						
							|  |  |  |                     'retrieval_method': [ | 
					
						
							| 
									
										
										
										
											2024-06-19 16:05:27 +08:00
										 |  |  |                         RetrievalMethod.SEMANTIC_SEARCH, | 
					
						
							|  |  |  |                         RetrievalMethod.FULL_TEXT_SEARCH, | 
					
						
							|  |  |  |                         RetrievalMethod.HYBRID_SEARCH, | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  |                     ] | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             case _: | 
					
						
							|  |  |  |                 raise ValueError(f"Unsupported vector db type {vector_type}.") | 
					
						
							| 
									
										
										
										
											2023-11-17 22:13:37 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class DatasetRetrievalSettingMockApi(Resource): | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def get(self, vector_type): | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  |         match vector_type: | 
					
						
							| 
									
										
										
										
											2024-06-22 01:48:07 +08:00
										 |  |  |             case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCENT | VectorType.ORACLE: | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  |                 return { | 
					
						
							|  |  |  |                     'retrieval_method': [ | 
					
						
							| 
									
										
										
										
											2024-06-19 16:05:27 +08:00
										 |  |  |                         RetrievalMethod.SEMANTIC_SEARCH | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  |                     ] | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2024-06-19 12:44:33 +08:00
										 |  |  |             case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH: | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  |                 return { | 
					
						
							|  |  |  |                     'retrieval_method': [ | 
					
						
							| 
									
										
										
										
											2024-06-19 16:05:27 +08:00
										 |  |  |                         RetrievalMethod.SEMANTIC_SEARCH, | 
					
						
							|  |  |  |                         RetrievalMethod.FULL_TEXT_SEARCH, | 
					
						
							|  |  |  |                         RetrievalMethod.HYBRID_SEARCH, | 
					
						
							| 
									
										
										
										
											2024-06-08 22:29:24 +08:00
										 |  |  |                     ] | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             case _: | 
					
						
							|  |  |  |                 raise ValueError(f"Unsupported vector db type {vector_type}.") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-17 22:13:37 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-15 02:46:02 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-24 15:02:29 +08:00
										 |  |  | class DatasetErrorDocs(Resource): | 
					
						
							|  |  |  |     @setup_required | 
					
						
							|  |  |  |     @login_required | 
					
						
							|  |  |  |     @account_initialization_required | 
					
						
							|  |  |  |     def get(self, dataset_id): | 
					
						
							|  |  |  |         dataset_id_str = str(dataset_id) | 
					
						
							|  |  |  |         dataset = DatasetService.get_dataset(dataset_id_str) | 
					
						
							|  |  |  |         if dataset is None: | 
					
						
							|  |  |  |             raise NotFound("Dataset not found.") | 
					
						
							|  |  |  |         results = DocumentService.get_error_documents_by_dataset_id(dataset_id_str) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'data': [marshal(item, document_status_fields) for item in results], | 
					
						
							|  |  |  |             'total': len(results) | 
					
						
							|  |  |  |         }, 200 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-17 22:13:37 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | api.add_resource(DatasetListApi, '/datasets') | 
					
						
							|  |  |  | api.add_resource(DatasetApi, '/datasets/<uuid:dataset_id>') | 
					
						
							| 
									
										
										
										
											2024-07-01 17:06:51 +08:00
										 |  |  | api.add_resource(DatasetUseCheckApi, '/datasets/<uuid:dataset_id>/use-check') | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | api.add_resource(DatasetQueryApi, '/datasets/<uuid:dataset_id>/queries') | 
					
						
							| 
									
										
										
										
											2024-04-24 15:02:29 +08:00
										 |  |  | api.add_resource(DatasetErrorDocs, '/datasets/<uuid:dataset_id>/error-docs') | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | api.add_resource(DatasetIndexingEstimateApi, '/datasets/indexing-estimate') | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | api.add_resource(DatasetRelatedAppListApi, '/datasets/<uuid:dataset_id>/related-apps') | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | api.add_resource(DatasetIndexingStatusApi, '/datasets/<uuid:dataset_id>/indexing-status') | 
					
						
							| 
									
										
										
										
											2023-09-27 16:06:32 +08:00
										 |  |  | api.add_resource(DatasetApiKeyApi, '/datasets/api-keys') | 
					
						
							| 
									
										
										
										
											2023-09-28 10:41:41 +08:00
										 |  |  | api.add_resource(DatasetApiDeleteApi, '/datasets/api-keys/<uuid:api_key_id>') | 
					
						
							| 
									
										
										
										
											2023-09-27 16:06:32 +08:00
										 |  |  | api.add_resource(DatasetApiBaseUrlApi, '/datasets/api-base-info') | 
					
						
							| 
									
										
										
										
											2023-11-17 22:13:37 +08:00
										 |  |  | api.add_resource(DatasetRetrievalSettingApi, '/datasets/retrieval-setting') | 
					
						
							|  |  |  | api.add_resource(DatasetRetrievalSettingMockApi, '/datasets/retrieval-setting/<string:vector_type>') |