Small fixes to the public demo (#1781)

* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues * Add workaround for a Streamlit bug * Make default filters value an empty dict * Return more context for each answer in the rest api * Make the hs_version call not-blocking by adding a very quick timeout * Add disclaimer on low confidence answer * Use the no-answer feature of the reader to highlight questions with no good answer
2025-12-28 07:29:06 +00:00 · 2021-11-22 19:06:08 +01:00 · 2021-11-22 19:06:08 +01:00 · 7167a26483
commit 7167a26483
parent 9211c4c64d
3 changed files with 58 additions and 33 deletions
--- a/rest_api/pipeline/pipelines.yaml
+++ b/rest_api/pipeline/pipelines.yaml
@ -14,6 +14,8 @@ components:    # define all the building-blocks for Pipeline
    type: FARMReader    # Haystack Class name for the component
    params:
      model_name_or_path: deepset/roberta-base-squad2
+      context_window_size: 500
+      return_no_answer: true
  - name: TextFileConverter
    type: TextConverter
  - name: PDFFileConverter
--- a/ui/utils.py
+++ b/ui/utils.py
@ -24,10 +24,9 @@ def haystack_is_ready():
@st.cache
 def haystack_version():
    url = f"{API_ENDPOINT}/{HS_VERSION}"
-    return requests.get(url).json()["hs_version"]
+    return requests.get(url, timeout=0.1).json()["hs_version"]

-
-def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5):
+def retrieve_doc(query, filters={}, top_k_reader=5, top_k_retriever=5):
    # Query Haystack API
    url = f"{API_ENDPOINT}/{DOC_REQUEST}"
    params = {"filters": filters, "Retriever": {"top_k": top_k_retriever}, "Reader": {"top_k": top_k_reader}}
@ -43,7 +42,7 @@ def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5):
    answers = response_raw["answers"]
    for i in range(len(answers)):
        answer = answers[i]
-        answer_text = answer["answer"]
+        answer_text = answer.get("answer", None)
        if answer_text:
            result.append(
                {
@ -55,24 +54,35 @@ def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5):
                    "offset_start_in_doc": answer["offsets_in_document"][0]["start"],
                }
            )
+        else:
+            result.append(
+                {
+                    "context": None,
+                    "answer": None,
+                    "relevance": round(answer["score"] * 100, 2),
+                }
+            )
    return result, response_raw


 def feedback_doc(question, is_correct_answer, document_id, model_id, is_correct_document, answer, offset_start_in_doc):
    # Feedback Haystack API
-    url = f"{API_ENDPOINT}/{DOC_FEEDBACK}"
-    #TODO adjust after Label refactoring
-    req = {
-        "question": question,
-        "is_correct_answer": is_correct_answer,
-        "document_id": document_id,
-        "model_id": model_id,
-        "is_correct_document": is_correct_document,
-        "answer": answer,
-        "offset_start_in_doc": offset_start_in_doc,
-    }
-    response_raw = requests.post(url, json=req).json()
-    return response_raw
+    try:
+        url = f"{API_ENDPOINT}/{DOC_FEEDBACK}"
+        #TODO adjust after Label refactoring
+        req = {
+            "question": question,
+            "is_correct_answer": is_correct_answer,
+            "document_id": document_id,
+            "model_id": model_id,
+            "is_correct_document": is_correct_document,
+            "answer": answer,
+            "offset_start_in_doc": offset_start_in_doc,
+        }
+        response_raw = requests.post(url, json=req).json()
+        return response_raw
+    except Exception as e:
+        logging.exception(e)


 def upload_doc(file):
--- a/ui/webapp.py
+++ b/ui/webapp.py
@ -1,11 +1,15 @@
 import os
 import sys

+import html
 import logging
 import pandas as pd
+from json import JSONDecodeError
 from pathlib import Path
 import streamlit as st
-from annotated_text import annotated_text
+from annotated_text import annotation
+from markdown import markdown
+from htbuilder import H

 # streamlit does not support any states out of the box. On every button click, streamlit reload the whole page
 # and every value gets lost. To keep track of our feedback state we use the official streamlit gist mentioned
@ -23,9 +27,6 @@ EVAL_LABELS = os.getenv("EVAL_FILE", Path(__file__).parent / "eval_labels_exampl
 # Whether the file upload should be enabled or not
 DISABLE_FILE_UPLOAD = os.getenv("HAYSTACK_UI_DISABLE_FILE_UPLOAD")

-# Retrieve Haystack version from the REST API
-HS_VERSION = haystack_version()
-

 def main():

@ -66,6 +67,12 @@ def main():
                    st.subheader("REST API JSON response")
                    st.sidebar.write(raw_json)

+    hs_version = None
+    try:
+        hs_version = f" <small>(v{haystack_version()})</small>"
+    except Exception:
+        pass
+
    st.sidebar.markdown(f"""
    <style>
        a {{
@ -84,7 +91,7 @@ def main():
    </style>
    <div class="haystack-footer">
        <hr />
-        <h4>Built with <a href="https://www.deepset.ai/haystack">Haystack</a> <small>(v{HS_VERSION})</small></h4>
+        <h4>Built with <a href="https://www.deepset.ai/haystack">Haystack</a>{hs_version}</h4>
        <p>Get it on <a href="https://github.com/deepset-ai/haystack/">GitHub</a> &nbsp;&nbsp; - &nbsp;&nbsp; Read the <a href="https://haystack.deepset.ai/overview/intro">Docs</a></p>
        <small>Data crawled from <a href="https://en.wikipedia.org/wiki/Category:Lists_of_countries_by_continent">Wikipedia</a> in November 2021.<br />See the <a href="https://creativecommons.org/licenses/by-sa/3.0/">License</a> (CC BY-SA 3.0).</small>
    </div>
@ -134,6 +141,9 @@ def main():
        ):
            try:
                state.results, state.raw_json = retrieve_doc(question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever)
+            except JSONDecodeError as je:
+                st.error("👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?")
+                return
            except Exception as e:
                logging.exception(e)
                if "The server is busy processing requests" in str(e):
@ -157,11 +167,14 @@ def main():
                answer, context = result["answer"], result["context"]
                start_idx = context.find(answer)
                end_idx = start_idx + len(answer)
-                annotated_text(context[:start_idx], (answer, "ANSWER", "#8ef"), context[end_idx:])
-            else:
-                st.markdown(result["context"])
+                # Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190 
+                st.write(markdown(context[:start_idx] + str(annotation(answer, "ANSWER", "#8ef")) + context[end_idx:]), unsafe_allow_html=True)
+                st.write("**Relevance:** ", result["relevance"], "**Source:** ", result["source"])

-            st.write("**Relevance:** ", result["relevance"], "**Source:** ", result["source"])
+            else:
+                st.warning("🤔 &nbsp;&nbsp; Haystack found no good answer to your question. Try to formulate it differently!")
+                st.write("**Relevance:** ", result["relevance"])
+                
            if eval_mode:
                # Define columns for buttons
                button_col1, button_col2, button_col3, _ = st.columns([1, 1, 1, 6])
@ -169,11 +182,11 @@ def main():
                    feedback_doc(
                        question=question, 
                        is_correct_answer="true", 
-                        document_id=result["document_id"], 
+                        document_id=result.get("document_id", None), 
                        model_id=1, 
                        is_correct_document="true",
-                        answer=result["answer"], 
-                        offset_start_in_doc=result["offset_start_in_doc"]
+                        answer=result["answer"],
+                        offset_start_in_doc=result.get("offset_start_in_doc", None)
                    )
                    st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")

@ -181,11 +194,11 @@ def main():
                    feedback_doc(
                        question=question, 
                        is_correct_answer="false", 
-                        document_id=result["document_id"], 
+                        document_id=result.get("document_id", None), 
                        model_id=1, 
                        is_correct_document="false",
                        answer=result["answer"], 
-                        offset_start_in_doc=result["offset_start_in_doc"]
+                        offset_start_in_doc=result.get("offset_start_in_doc", None)
                    )
                    st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")

@ -193,11 +206,11 @@ def main():
                    feedback_doc(
                        question=question, 
                        is_correct_answer="false", 
-                        document_id=result["document_id"], 
+                        document_id=result.get("document_id", None), 
                        model_id=1, 
                        is_correct_document="true",
                        answer=result["answer"], 
-                        offset_start_in_doc=result["offset_start_in_doc"]
+                        offset_start_in_doc=result.get("offset_start_in_doc", None)
                    )
                    st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
                count += 1