mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-24 08:28:22 +00:00
Add flag for use of window queries in SQLDocumentStore (#768)
This commit is contained in:
parent
46307d1571
commit
d9f011da9a
@ -96,6 +96,11 @@ class SQLDocumentStore(BaseDocumentStore):
|
||||
self.update_existing_documents = update_existing_documents
|
||||
if getattr(self, "similarity", None) is None:
|
||||
self.similarity = None
|
||||
self.use_windowed_query = True
|
||||
if "sqlite" in url:
|
||||
import sqlite3
|
||||
if sqlite3.sqlite_version < "3.25":
|
||||
self.use_windowed_query = False
|
||||
|
||||
def get_document_by_id(self, id: str, index: Optional[str] = None) -> Optional[Document]:
|
||||
"""Fetch a document by specifying its text id string"""
|
||||
@ -182,7 +187,11 @@ class SQLDocumentStore(BaseDocumentStore):
|
||||
)
|
||||
|
||||
documents_map = {}
|
||||
for i, row in enumerate(self._windowed_query(documents_query, DocumentORM.id, batch_size), start=1):
|
||||
|
||||
if self.use_windowed_query:
|
||||
documents_query = self._windowed_query(documents_query, DocumentORM.id, batch_size)
|
||||
|
||||
for i, row in enumerate(documents_query, start=1):
|
||||
documents_map[row.id] = Document(
|
||||
id=row.id,
|
||||
text=row.text,
|
||||
|
@ -51,6 +51,19 @@ def test_get_all_documents_with_correct_filters(document_store_with_docs):
|
||||
assert {d.meta["meta_field"] for d in documents} == {"test1", "test3"}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("document_store_with_docs", ["sql"], indirect=True)
|
||||
def test_get_all_documents_with_correct_filters_legacy_sqlite(document_store_with_docs):
|
||||
document_store_with_docs.use_windowed_query = False
|
||||
documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test2"]})
|
||||
assert len(documents) == 1
|
||||
assert documents[0].meta["name"] == "filename2"
|
||||
|
||||
documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test1", "test3"]})
|
||||
assert len(documents) == 2
|
||||
assert {d.meta["name"] for d in documents} == {"filename1", "filename3"}
|
||||
assert {d.meta["meta_field"] for d in documents} == {"test1", "test3"}
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_get_all_documents_with_incorrect_filter_name(document_store_with_docs):
|
||||
documents = document_store_with_docs.get_all_documents(filters={"incorrect_meta_field": ["test2"]})
|
||||
|
Loading…
x
Reference in New Issue
Block a user