from abc import abstractmethod from typing import Any, Optional, Dict from pydantic import BaseModel, Field class BaseDocumentStore: """ Base class for implementing Document Stores. """ @abstractmethod def write_documents(self, documents): pass @abstractmethod def get_document_by_id(self, id): pass @abstractmethod def get_document_ids_by_tags(self, tag): pass @abstractmethod def get_document_count(self): pass @abstractmethod def query_by_embedding(self, query_emb, top_k=10, candidate_doc_ids=None): pass class Document(BaseModel): id: str = Field(..., description="_id field from Elasticsearch") text: str = Field(..., description="Text of the document") external_source_id: Optional[str] = Field( None, description="id for the source file the document was created from. In the case when a large file is divided " "across multiple Elasticsearch documents, this id can be used to reference original source file.", ) # name: Optional[str] = Field(None, description="Title of the document") question: Optional[str] = Field(None, description="Question text for FAQs.") query_score: Optional[float] = Field(None, description="Elasticsearch query score for a retrieved document") meta: Optional[Dict[str, Any]] = Field(None, description="") tags: Optional[Dict[str, Any]] = Field(None, description="Tags that allow filtering of the data")