mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-06 13:35:42 +00:00
fix: Run update_embeddings in examples (#6008)
* added hybrid search example Added an example about hybrid search for faq pipeline on covid dataset * formatted with back formatter * renamed document * fixed * fixed typos * added test added test for hybrid search * fixed withespaces * removed test for hybrid search * fixed pylint * commented logging * updated hybrid search example * release notes * Update hybrid_search_faq_pipeline.py-815df846dca7e872.yaml * Update hybrid_search_faq_pipeline.py * mention hybrid search example in release notes * reduce installed dependencies in examples test workflow * do not install cuda dependencies * skip models if API key not set; delete document indices * skip models if API key not set; delete document indices * skip models if API key not set; delete document indices * keep roberta-base model and inference extra * pylint * disable pylint no-logging-basicconfig rule --------- Co-authored-by: Julian Risch <julian.risch@deepset.ai>
This commit is contained in:
parent
c05f564359
commit
c102b152dc
4
.github/workflows/examples_tests.yml
vendored
4
.github/workflows/examples_tests.yml
vendored
@ -42,7 +42,9 @@ jobs:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install Haystack
|
||||
run: pip install .[all,dev]
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install .[inference,dev,elasticsearch,preprocessing,file-conversion]
|
||||
|
||||
- name: Run
|
||||
run: pytest examples/
|
||||
|
@ -1,23 +1,25 @@
|
||||
# Disable pylint errors for logging basicConfig
|
||||
# pylint: disable=no-logging-basicconfig
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from haystack.document_stores import ElasticsearchDocumentStore
|
||||
from haystack.nodes import EmbeddingRetriever
|
||||
from haystack.nodes.other.docs2answers import Docs2Answers
|
||||
from haystack.pipelines import Pipeline
|
||||
from haystack.utils import fetch_archive_from_http, launch_es, print_answers
|
||||
|
||||
logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING)
|
||||
logging.getLogger("haystack").setLevel(logging.INFO)
|
||||
|
||||
from haystack.document_stores import ElasticsearchDocumentStore
|
||||
|
||||
from haystack.nodes import EmbeddingRetriever
|
||||
from haystack.nodes.other.docs2answers import Docs2Answers
|
||||
from haystack.utils import launch_es, print_answers, fetch_archive_from_http
|
||||
import pandas as pd
|
||||
from haystack.pipelines import Pipeline
|
||||
|
||||
|
||||
def basic_faq_pipeline():
|
||||
document_store = ElasticsearchDocumentStore(
|
||||
host="localhost",
|
||||
username="",
|
||||
password="",
|
||||
index="document",
|
||||
index="example-document",
|
||||
embedding_field="question_emb",
|
||||
embedding_dim=384,
|
||||
excluded_meta_data=["question_emb"],
|
||||
@ -52,6 +54,7 @@ def basic_faq_pipeline():
|
||||
# Convert Dataframe to list of dicts and index them in our DocumentStore
|
||||
docs_to_index = df.to_dict(orient="records")
|
||||
document_store.write_documents(docs_to_index)
|
||||
document_store.update_embeddings(retriever)
|
||||
|
||||
# Initialize a Pipeline (this time without a reader) and ask questions
|
||||
pipeline = Pipeline()
|
||||
@ -62,6 +65,9 @@ def basic_faq_pipeline():
|
||||
prediction = pipeline.run(query="How is the virus spreading?", params={"Retriever": {"top_k": 10}})
|
||||
|
||||
print_answers(prediction, details="medium")
|
||||
|
||||
# Remove the index once we're done to save space
|
||||
document_store.delete_index(index="example-document")
|
||||
return prediction
|
||||
|
||||
|
||||
|
@ -1,21 +1,23 @@
|
||||
# Disable pylint errors for logging basicConfig
|
||||
# pylint: disable=no-logging-basicconfig
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from haystack.document_stores import ElasticsearchDocumentStore
|
||||
from haystack.nodes import BM25Retriever, FARMReader
|
||||
from haystack.nodes.file_classifier import FileTypeClassifier
|
||||
from haystack.nodes.file_converter import TextConverter
|
||||
from haystack.nodes.preprocessor import PreProcessor
|
||||
from haystack.pipelines import Pipeline
|
||||
from haystack.utils import fetch_archive_from_http, launch_es, print_answers
|
||||
|
||||
logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING)
|
||||
logging.getLogger("haystack").setLevel(logging.INFO)
|
||||
|
||||
from haystack.document_stores import ElasticsearchDocumentStore
|
||||
from haystack.utils import fetch_archive_from_http, print_answers, launch_es
|
||||
from haystack.nodes import FARMReader, BM25Retriever
|
||||
from haystack.nodes.file_classifier import FileTypeClassifier
|
||||
from haystack.nodes.preprocessor import PreProcessor
|
||||
from haystack.nodes.file_converter import TextConverter
|
||||
from haystack.pipelines import Pipeline
|
||||
|
||||
|
||||
def basic_qa_pipeline():
|
||||
# Initialize a DocumentStore
|
||||
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")
|
||||
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="example-document")
|
||||
|
||||
# fetch, pre-process and write documents
|
||||
doc_dir = "data/basic_qa_pipeline"
|
||||
@ -66,6 +68,9 @@ def basic_qa_pipeline():
|
||||
)
|
||||
|
||||
print_answers(prediction, details="minimum")
|
||||
|
||||
# Remove the index once we're done to save space
|
||||
document_store.delete_index(index="example-document")
|
||||
return prediction
|
||||
|
||||
|
||||
|
@ -55,6 +55,7 @@ def hybrid_search_faq_pipeline():
|
||||
# Convert Dataframe to list of dicts and index them in our DocumentStore
|
||||
docs_to_index = df.to_dict(orient="records")
|
||||
document_store.write_documents(docs_to_index)
|
||||
document_store.update_embeddings(retriever=dense_retriever)
|
||||
|
||||
# Initialize a Pipeline (this time without a reader) and ask questions
|
||||
pipeline = Pipeline()
|
||||
|
@ -1,6 +1,6 @@
|
||||
from examples.basic_faq_pipeline import basic_faq_pipeline
|
||||
|
||||
from haystack.schema import Answer, Document
|
||||
from haystack.schema import Answer
|
||||
|
||||
|
||||
def test_basic_faq_pipeline():
|
||||
|
@ -6,8 +6,7 @@ from examples.getting_started import getting_started
|
||||
from haystack.schema import Answer, Document
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.parametrize("provider", ["cohere", "huggingface", "openai"])
|
||||
@pytest.mark.parametrize("provider", ["anthropic", "cohere", "huggingface", "openai"])
|
||||
def test_getting_started(provider):
|
||||
if provider == "anthropic":
|
||||
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
||||
@ -17,6 +16,8 @@ def test_getting_started(provider):
|
||||
api_key = os.environ.get("HUGGINGFACE_API_KEY", "")
|
||||
elif provider == "openai":
|
||||
api_key = os.environ.get("OPENAI_API_KEY", "")
|
||||
|
||||
if api_key:
|
||||
result = getting_started(provider=provider, API_KEY=api_key)
|
||||
|
||||
# Testing only for functionality. Since model predictions from APIs might change, we cannot test those directly.
|
||||
|
@ -0,0 +1,4 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Added documents_store.update_embeddings call to pipeline examples so that embeddings are calculated for newly added documents.
|
Loading…
x
Reference in New Issue
Block a user