2021-10-12 10:53:54 +02:00
|
|
|
from typing import List
|
|
|
|
|
2021-10-04 11:21:00 +02:00
|
|
|
import logging
|
|
|
|
|
2022-04-12 16:41:05 +02:00
|
|
|
from fastapi import FastAPI, APIRouter
|
|
|
|
from haystack.document_stores import BaseDocumentStore
|
2021-10-04 11:21:00 +02:00
|
|
|
|
2022-04-12 16:41:05 +02:00
|
|
|
from rest_api.utils import get_app, get_pipelines
|
2021-10-04 11:21:00 +02:00
|
|
|
from rest_api.config import LOG_LEVEL
|
2021-10-18 14:38:14 +02:00
|
|
|
from rest_api.schema import FilterRequest, DocumentSerialized
|
2021-10-04 11:21:00 +02:00
|
|
|
|
|
|
|
|
|
|
|
logging.getLogger("haystack").setLevel(LOG_LEVEL)
|
|
|
|
logger = logging.getLogger("haystack")
|
|
|
|
|
|
|
|
|
|
|
|
router = APIRouter()
|
2022-04-12 16:41:05 +02:00
|
|
|
app: FastAPI = get_app()
|
|
|
|
document_store: BaseDocumentStore = get_pipelines().get("document_store", None)
|
2021-10-04 11:21:00 +02:00
|
|
|
|
|
|
|
|
2021-11-11 09:40:58 +01:00
|
|
|
@router.post("/documents/get_by_filters", response_model=List[DocumentSerialized], response_model_exclude_none=True)
|
|
|
|
def get_documents(filters: FilterRequest):
|
2021-10-12 10:53:54 +02:00
|
|
|
"""
|
2022-02-03 13:43:18 +01:00
|
|
|
This endpoint allows you to retrieve documents contained in your document store.
|
2022-01-27 13:06:01 +01:00
|
|
|
You can filter the documents to delete by metadata (like the document's name),
|
|
|
|
or provide an empty JSON object to clear the document store.
|
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
Example of filters:
|
2022-01-27 13:06:01 +01:00
|
|
|
`'{"filters": {{"name": ["some", "more"], "category": ["only_one"]}}'`
|
2022-02-03 13:43:18 +01:00
|
|
|
|
2022-01-27 13:06:01 +01:00
|
|
|
To get all documents you should provide an empty dict, like:
|
|
|
|
`'{"filters": {}}'`
|
2021-10-12 10:53:54 +02:00
|
|
|
"""
|
2022-04-12 16:41:05 +02:00
|
|
|
docs = [doc.to_dict() for doc in document_store.get_all_documents(filters=filters.filters)]
|
2021-10-12 10:53:54 +02:00
|
|
|
for doc in docs:
|
2021-10-13 14:23:23 +02:00
|
|
|
doc["embedding"] = None
|
|
|
|
return docs
|
2021-10-12 10:53:54 +02:00
|
|
|
|
|
|
|
|
2021-10-04 11:21:00 +02:00
|
|
|
@router.post("/documents/delete_by_filters", response_model=bool)
|
2021-11-11 09:40:58 +01:00
|
|
|
def delete_documents(filters: FilterRequest):
|
2021-10-04 11:21:00 +02:00
|
|
|
"""
|
2022-02-03 13:43:18 +01:00
|
|
|
This endpoint allows you to delete documents contained in your document store.
|
|
|
|
You can filter the documents to delete by metadata (like the document's name),
|
2022-01-27 13:06:01 +01:00
|
|
|
or provide an empty JSON object to clear the document store.
|
|
|
|
|
2022-02-03 13:43:18 +01:00
|
|
|
Example of filters:
|
2022-01-27 13:06:01 +01:00
|
|
|
`'{"filters": {{"name": ["some", "more"], "category": ["only_one"]}}'`
|
2022-02-03 13:43:18 +01:00
|
|
|
|
2022-01-27 13:06:01 +01:00
|
|
|
To get all documents you should provide an empty dict, like:
|
|
|
|
`'{"filters": {}}'`
|
2021-10-04 11:21:00 +02:00
|
|
|
"""
|
2022-04-12 16:41:05 +02:00
|
|
|
document_store.delete_documents(filters=filters.filters)
|
2022-02-03 13:43:18 +01:00
|
|
|
return True
|