Add flag for use of window queries in SQLDocumentStore (#768)

This commit is contained in:
Tanay Soni 2021-01-25 12:54:34 +01:00 committed by GitHub
parent 46307d1571
commit d9f011da9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 1 deletions

View File

@ -96,6 +96,11 @@ class SQLDocumentStore(BaseDocumentStore):
self.update_existing_documents = update_existing_documents
if getattr(self, "similarity", None) is None:
self.similarity = None
self.use_windowed_query = True
if "sqlite" in url:
import sqlite3
if sqlite3.sqlite_version < "3.25":
self.use_windowed_query = False
def get_document_by_id(self, id: str, index: Optional[str] = None) -> Optional[Document]:
"""Fetch a document by specifying its text id string"""
@ -182,7 +187,11 @@ class SQLDocumentStore(BaseDocumentStore):
)
documents_map = {}
for i, row in enumerate(self._windowed_query(documents_query, DocumentORM.id, batch_size), start=1):
if self.use_windowed_query:
documents_query = self._windowed_query(documents_query, DocumentORM.id, batch_size)
for i, row in enumerate(documents_query, start=1):
documents_map[row.id] = Document(
id=row.id,
text=row.text,

View File

@ -51,6 +51,19 @@ def test_get_all_documents_with_correct_filters(document_store_with_docs):
assert {d.meta["meta_field"] for d in documents} == {"test1", "test3"}
@pytest.mark.parametrize("document_store_with_docs", ["sql"], indirect=True)
def test_get_all_documents_with_correct_filters_legacy_sqlite(document_store_with_docs):
document_store_with_docs.use_windowed_query = False
documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test2"]})
assert len(documents) == 1
assert documents[0].meta["name"] == "filename2"
documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test1", "test3"]})
assert len(documents) == 2
assert {d.meta["name"] for d in documents} == {"filename1", "filename3"}
assert {d.meta["meta_field"] for d in documents} == {"test1", "test3"}
@pytest.mark.elasticsearch
def test_get_all_documents_with_incorrect_filter_name(document_store_with_docs):
documents = document_store_with_docs.get_all_documents(filters={"incorrect_meta_field": ["test2"]})