2020-05-18 14:47:41 +02:00

46 lines
1.5 KiB
Python

from abc import abstractmethod
from typing import Any, Optional, Dict
from pydantic import BaseModel, Field
class BaseDocumentStore:
"""
Base class for implementing Document Stores.
"""
@abstractmethod
def write_documents(self, documents):
pass
@abstractmethod
def get_document_by_id(self, id):
pass
@abstractmethod
def get_document_ids_by_tags(self, tag):
pass
@abstractmethod
def get_document_count(self):
pass
@abstractmethod
def query_by_embedding(self, query_emb, top_k=10, candidate_doc_ids=None):
pass
class Document(BaseModel):
id: str = Field(..., description="_id field from Elasticsearch")
text: str = Field(..., description="Text of the document")
external_source_id: Optional[str] = Field(
None,
description="id for the source file the document was created from. In the case when a large file is divided "
"across multiple Elasticsearch documents, this id can be used to reference original source file.",
)
# name: Optional[str] = Field(None, description="Title of the document")
question: Optional[str] = Field(None, description="Question text for FAQs.")
query_score: Optional[float] = Field(None, description="Elasticsearch query score for a retrieved document")
meta: Optional[Dict[str, Any]] = Field(None, description="")
tags: Optional[Dict[str, Any]] = Field(None, description="Tags that allow filtering of the data")