mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-29 07:59:27 +00:00
Removing (deprecation) warnings (#530)
1. Few warnings need fix in FARM 2. Can't remove warning from docx library.
This commit is contained in:
parent
f5419163e7
commit
5d45992c84
@ -304,7 +304,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
||||
|
||||
def update_document_meta(self, id: str, meta: Dict[str, str]):
|
||||
body = {"doc": meta}
|
||||
self.client.update(index=self.index, doc_type="_doc", id=id, body=body, refresh=self.refresh_type)
|
||||
self.client.update(index=self.index, id=id, body=body, refresh=self.refresh_type)
|
||||
|
||||
def get_document_count(self, filters: Optional[Dict[str, List[str]]] = None, index: Optional[str] = None) -> int:
|
||||
index = index or self.index
|
||||
|
||||
@ -196,12 +196,14 @@ def tika_convert_files_to_dicts(
|
||||
last_para = ''
|
||||
for para in paras:
|
||||
para = para.strip()
|
||||
if not para: continue
|
||||
if not para:
|
||||
continue
|
||||
# merge paragraphs to improve qa
|
||||
# merge this paragraph if less than 10 characters or 2 words
|
||||
# or this paragraph starts with a lower case and last paragraph does not end with a punctuation
|
||||
if merge_short and len(para) < 10 or len(re.findall('\s+', para)) < 2 \
|
||||
or merge_lowercase and para and para[0].islower() and last_para and last_para[-1] not in '.?!"\'\]\)':
|
||||
if merge_short and len(para) < 10 or len(re.findall(r'\s+', para)) < 2 \
|
||||
or merge_lowercase and para and para[0].islower() and last_para \
|
||||
and last_para[-1] not in r'.?!"\'\]\)':
|
||||
last_para += ' ' + para
|
||||
else:
|
||||
if last_para:
|
||||
|
||||
@ -338,6 +338,7 @@ class FARMReader(BaseReader):
|
||||
inputs.append(cur)
|
||||
|
||||
# get answers from QA model
|
||||
# TODO: Need fix in FARM's `to_dict` function of `QAInput` class
|
||||
predictions = self.inferencer.inference_from_objects(
|
||||
objects=inputs, return_json=False, multiprocessing_chunksize=1
|
||||
)
|
||||
|
||||
@ -5,13 +5,9 @@ import numpy as np
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
|
||||
from farm.infer import Inferencer
|
||||
|
||||
from haystack.document_store.base import BaseDocumentStore
|
||||
from haystack import Document
|
||||
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
|
||||
from haystack.retriever.base import BaseRetriever
|
||||
from haystack.retriever.sparse import logger
|
||||
|
||||
from farm.infer import Inferencer
|
||||
from farm.modeling.tokenization import Tokenizer
|
||||
@ -374,6 +370,8 @@ class EmbeddingRetriever(BaseRetriever):
|
||||
assert type(texts) == list, "Expecting a list of texts, i.e. create_embeddings(texts=['text1',...])"
|
||||
|
||||
if self.model_format == "farm" or self.model_format == "transformers":
|
||||
# TODO: FARM's `sample_to_features_text` need to fix following warning -
|
||||
# tokenization_utils.py:460: FutureWarning: `is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.
|
||||
emb = self.embedding_model.inference_from_dicts(dicts=[{"text": t} for t in texts]) # type: ignore
|
||||
emb = [(r["vec"]) for r in emb]
|
||||
elif self.model_format == "sentence_transformers":
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user