mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-02 18:59:28 +00:00
46 lines
1.5 KiB
Python
46 lines
1.5 KiB
Python
from abc import abstractmethod
|
|
from typing import Any, Optional, Dict
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class BaseDocumentStore:
|
|
"""
|
|
Base class for implementing Document Stores.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def write_documents(self, documents):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_document_by_id(self, id):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_document_ids_by_tags(self, tag):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_document_count(self):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def query_by_embedding(self, query_emb, top_k=10, candidate_doc_ids=None):
|
|
pass
|
|
|
|
|
|
class Document(BaseModel):
|
|
id: str = Field(..., description="_id field from Elasticsearch")
|
|
text: str = Field(..., description="Text of the document")
|
|
external_source_id: Optional[str] = Field(
|
|
None,
|
|
description="id for the source file the document was created from. In the case when a large file is divided "
|
|
"across multiple Elasticsearch documents, this id can be used to reference original source file.",
|
|
)
|
|
# name: Optional[str] = Field(None, description="Title of the document")
|
|
question: Optional[str] = Field(None, description="Question text for FAQs.")
|
|
query_score: Optional[float] = Field(None, description="Elasticsearch query score for a retrieved document")
|
|
meta: Optional[Dict[str, Any]] = Field(None, description="")
|
|
tags: Optional[Dict[str, Any]] = Field(None, description="Tags that allow filtering of the data")
|