diff --git a/haystack/database/base.py b/haystack/database/base.py index 5a5ada63c..4ad69ca6b 100644 --- a/haystack/database/base.py +++ b/haystack/database/base.py @@ -18,4 +18,8 @@ class BaseDocumentStore: def get_document_ids_by_tag(self, tag): pass + @abstractmethod + def get_document_count(self): + pass + diff --git a/haystack/database/sql.py b/haystack/database/sql.py index a00538e25..4101e447f 100644 --- a/haystack/database/sql.py +++ b/haystack/database/sql.py @@ -98,3 +98,7 @@ class SQLDocumentStore(BaseDocumentStore): row = Document(name=doc["name"], text=doc["text"]) self.session.add(row) self.session.commit() + + def get_document_count(self): + return self.session.query(Document).count() + diff --git a/haystack/indexing/io.py b/haystack/indexing/io.py index e474bfad3..cae24a0c3 100644 --- a/haystack/indexing/io.py +++ b/haystack/indexing/io.py @@ -19,11 +19,10 @@ def write_documents_to_db(datastore, document_dir, clean_func=None, only_empty_d :return: None """ file_paths = Path(document_dir).glob("**/*.txt") - n_docs = 0 # check if db has already docs if only_empty_db: - n_docs = len(datastore.get_all_documents()) + n_docs = datastore.get_document_count() if n_docs > 0: logger.info(f"Skip writing documents since DB already contains {n_docs} docs ... " "(Disable `only_empty_db`, if you want to add docs anyway.)")