From d81897535e8306493eabb924e1ec4d8ffc4e4e46 Mon Sep 17 00:00:00 2001
From: Sara Zan <sara.zanzottera@deepset.ai>
Date: Fri, 19 Nov 2021 11:34:32 +0100
Subject: [PATCH] Public demo (#1747)

* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.

* Add latest docstring and tutorial changes

* Tidy up: remove needless state, add comments, fix minor bugs

* Had to add results to the status to avoid some bugs in eval mode

* Added 'credits'

* Add footers, update requirements, some random questions for the evaluation

* Add requested changes

* Temporary rollback the UI to the old GoT dataset

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 rest_api/controller/search.py |   6 +
 ui/requirements.txt           |   4 +-
 ui/utils.py                   |  11 +-
 ui/webapp.py                  | 255 +++++++++++++++++++---------------
 4 files changed, 158 insertions(+), 118 deletions(-)

diff --git a/rest_api/controller/search.py b/rest_api/controller/search.py
index b9010bed3..e45557bfe 100644
--- a/rest_api/controller/search.py
+++ b/rest_api/controller/search.py
@@ -4,6 +4,7 @@ from pathlib import Path
 
 from fastapi import APIRouter
 
+import haystack
 from haystack.pipelines.base import Pipeline
 from rest_api.config import PIPELINE_YAML_PATH, QUERY_PIPELINE_NAME
 from rest_api.config import LOG_LEVEL, CONCURRENT_REQUEST_PER_WORKER
@@ -42,6 +43,11 @@ def check_status():
     return True
 
 
+@router.get("/hs_version")
+def haystack_version():
+    return {"hs_version": haystack.__version__}
+
+
 @router.post("/query", response_model=QueryResponse, response_model_exclude_none=True)
 def query(request: QueryRequest):
     with concurrency_limiter.run():
diff --git a/ui/requirements.txt b/ui/requirements.txt
index 14d9d9fa3..76c407b5c 100644
--- a/ui/requirements.txt
+++ b/ui/requirements.txt
@@ -1,2 +1,2 @@
-streamlit>=0.84.0
-st-annotated-text==1.1.0
+streamlit>=1.2.0
+st-annotated-text==2.0.0
diff --git a/ui/utils.py b/ui/utils.py
index 7cc18ad47..d2577cc6b 100644
--- a/ui/utils.py
+++ b/ui/utils.py
@@ -6,6 +6,7 @@ import streamlit as st
 
 API_ENDPOINT = os.getenv("API_ENDPOINT", "http://localhost:8000")
 STATUS = "initialized"
+HS_VERSION = "hs_version"
 DOC_REQUEST = "query"
 DOC_FEEDBACK = "feedback"
 DOC_UPLOAD = "file-upload"
@@ -20,8 +21,12 @@ def haystack_is_ready():
         logging.exception(e)
     return False
 
+@st.cache
+def haystack_version():
+    url = f"{API_ENDPOINT}/{HS_VERSION}"
+    return requests.get(url).json()["hs_version"]
+
 
-@st.cache(show_spinner=False)
 def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5):
     # Query Haystack API
     url = f"{API_ENDPOINT}/{DOC_REQUEST}"
@@ -31,6 +36,10 @@ def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5):
 
     # Format response
     result = []
+
+    if "errors" in response_raw:
+        raise Exception(", ".join(response_raw["errors"]))
+        
     answers = response_raw["answers"]
     for i in range(len(answers)):
         answer = answers[i]
diff --git a/ui/webapp.py b/ui/webapp.py
index 68e16f3f7..5d84e196c 100644
--- a/ui/webapp.py
+++ b/ui/webapp.py
@@ -3,6 +3,7 @@ import sys
 
 import logging
 import pandas as pd
+from pathlib import Path
 import streamlit as st
 from annotated_text import annotated_text
 
@@ -10,177 +11,201 @@ from annotated_text import annotated_text
 # and every value gets lost. To keep track of our feedback state we use the official streamlit gist mentioned
 # here https://gist.github.com/tvst/036da038ab3e999a64497f42de966a92
 import SessionState
-from utils import feedback_doc, haystack_is_ready, retrieve_doc, upload_doc
+from utils import HS_VERSION, feedback_doc, haystack_is_ready, retrieve_doc, upload_doc, haystack_version
+
 
 # Adjust to a question that you would like users to see in the search bar when they load the UI:
-DEFAULT_QUESTION_AT_STARTUP = "Who is the father of Arya Stark?"
+DEFAULT_QUESTION_AT_STARTUP = "Who's the father of Arya Stark?"
 
+# Labels for the evaluation
+EVAL_LABELS = os.getenv("EVAL_FILE", Path(__file__).parent / "eval_labels_example.csv")
 
-def annotate_answer(answer, context):
-    """ If we are using an extractive QA pipeline, we'll get answers
-    from the API that we highlight in the given context"""
-    start_idx = context.find(answer)
-    end_idx = start_idx + len(answer)
-    # calculate dynamic height depending on context length
-    height = int(len(context) * 0.50) + 5
-    annotated_text(context[:start_idx], (answer, "ANSWER", "#8ef"), context[end_idx:], height=height)
+# Whether the file upload should be enabled or not
+DISABLE_FILE_UPLOAD = os.getenv("HAYSTACK_UI_DISABLE_FILE_UPLOAD")
 
-
-def show_plain_documents(text):
-    """ If we are using a plain document search pipeline, i.e. only retriever, we'll get plain documents
-    from the API that we just show without any highlighting"""
-    st.markdown(text)
-
-
-def random_questions(df):
-    """
-    Helper to get one random question + gold random_answer from the user's CSV 'eval_labels_example'.
-    This can then be shown in the UI when the evaluation mode is selected. Users can easily give feedback on the
-    model's results and "enrich" the eval dataset with more acceptable labels
-    """
-    random_row = df.sample(1)
-    random_question = random_row["Question Text"].values[0]
-    random_answer = random_row["Answer"].values[0]
-    return random_question, random_answer
+# Retrieve Haystack version from the REST API
+HS_VERSION = haystack_version()
 
 
 def main():
-    # Define state
-    state_question = SessionState.get(
-        random_question=DEFAULT_QUESTION_AT_STARTUP, random_answer="", next_question="false", run_query="false"
+
+    # Persistent state
+    state = SessionState.get(
+        random_question=DEFAULT_QUESTION_AT_STARTUP, 
+        random_answer="",
+        results=None,
+        raw_json=None,
+        get_next_question=True
     )
 
-    # Initialize variables
-    eval_mode = False
-    random_question = DEFAULT_QUESTION_AT_STARTUP
-    eval_labels = os.getenv("EVAL_FILE", "eval_labels_example.csv")
+    # Small callback to reset the interface in case the text of the question changes
+    def reset_results(*args):
+        state.results = None
+        state.raw_json = None
 
-    # UI search bar and sidebar
+    # Title
     st.write("# Haystack Demo")
+
+    # Sidebar
     st.sidebar.header("Options")
     top_k_reader = st.sidebar.slider("Max. number of answers", min_value=1, max_value=10, value=3, step=1)
-    top_k_retriever = st.sidebar.slider(
-        "Max. number of documents from retriever", min_value=1, max_value=10, value=3, step=1
-    )
+    top_k_retriever = st.sidebar.slider("Max. number of documents from retriever", min_value=1, max_value=10, value=3, step=1)
     eval_mode = st.sidebar.checkbox("Evaluation mode")
     debug = st.sidebar.checkbox("Show debug info")
 
-    st.sidebar.write("## File Upload:")
-    data_files = st.sidebar.file_uploader("", type=["pdf", "txt", "docx"], accept_multiple_files=True)
-    for data_file in data_files:
-        # Upload file
-        if data_file:
-            raw_json = upload_doc(data_file)
-            st.sidebar.write(raw_json)
-            if debug:
-                st.subheader("REST API JSON response")
-                st.sidebar.write(raw_json)
+    # File upload block
+    if not DISABLE_FILE_UPLOAD:
+        st.sidebar.write("## File Upload:")
+        data_files = st.sidebar.file_uploader("", type=["pdf", "txt", "docx"], accept_multiple_files=True)
+        for data_file in data_files:
+            # Upload file
+            if data_file:
+                raw_json = upload_doc(data_file)
+                st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ✅ ")
+                if debug:
+                    st.subheader("REST API JSON response")
+                    st.sidebar.write(raw_json)
 
-    # load csv into pandas dataframe
+    st.sidebar.markdown(f"""
+    <style>
+        a {{
+            text-decoration: none;
+        }}
+        .haystack-footer {{
+            text-align: center;
+        }}
+        .haystack-footer h4 {{
+            margin: 0.1rem; 
+            padding:0;
+        }}
+        footer {{
+            opacity: 0;
+        }}
+    </style>
+    <div class="haystack-footer">
+        <hr />
+        <h4>Built with <a href="https://www.deepset.ai/haystack">Haystack</a> <small>(v{HS_VERSION})</small></h4>
+        <p>Get it on <a href="https://github.com/deepset-ai/haystack/">GitHub</a> &nbsp;&nbsp; - &nbsp;&nbsp; Read the <a href="https://haystack.deepset.ai/overview/intro">Docs</a></p>
+        <small>Data crawled from <a href="https://en.wikipedia.org/wiki/Category:Lists_of_countries_by_continent">Wikipedia</a> in November 2021.<br />See the <a href="https://creativecommons.org/licenses/by-sa/3.0/">License</a> (CC BY-SA 3.0).</small>
+    </div>
+    """, unsafe_allow_html=True)
+
+    # Load csv into pandas dataframe
     if eval_mode:
         try:
-            df = pd.read_csv(eval_labels, sep=";")
+            df = pd.read_csv(EVAL_LABELS, sep=";")
         except Exception:
-            sys.exit("The eval file was not found. Please check the README for more information.")
-        if (
-            state_question
-            and hasattr(state_question, "next_question")
-            and hasattr(state_question, "random_question")
-            and state_question.next_question
-        ):
-            random_question = state_question.random_question
-            random_answer = state_question.random_answer
-        else:
-            random_question, random_answer = random_questions(df)
-            state_question.random_question = random_question
-            state_question.random_answer = random_answer
+            st.error(f"The eval file was not found. Please check the demo's [README](https://github.com/deepset-ai/haystack/tree/master/ui/README.md) for more information.")
+            sys.exit(f"The eval file was not found under `{EVAL_LABELS}`. Please check the README (https://github.com/deepset-ai/haystack/tree/master/ui/README.md) for more information.")
 
-    # Get next random question from the CSV
-    if eval_mode:
-        next_question = st.button("Load new question")
-        if next_question:
-            random_question, random_answer = random_questions(df)
-            state_question.random_question = random_question
-            state_question.random_answer = random_answer
-            state_question.next_question = True
-            state_question.run_query = False
-        else:
-            state_question.next_question = False
+        # Get next random question from the CSV
+        state.get_next_question = st.button("Load new question")
+        if state.get_next_question:
+            reset_results()
+            new_row = df.sample(1)   
+            while new_row["Question Text"].values[0] == state.random_question:  # Avoid picking the same question twice (the change is not visible on the UI)
+                new_row = df.sample(1)
+            state.random_question = new_row["Question Text"].values[0]
+            state.random_answer = new_row["Answer"].values[0]
 
     # Search bar
-    question = st.text_input("Please provide your query:", value=random_question)
-    if state_question and state_question.run_query:
-        run_query = state_question.run_query
-        st.button("Run")
-    else:
-        run_query = st.button("Run")
-        state_question.run_query = run_query
-
-    raw_json_feedback = ""
+    question = st.text_input(
+        "Please provide your query:", 
+        value=state.random_question, 
+        max_chars=100, 
+        on_change=reset_results
+    )
+    run_query = st.button("Run")
 
+    # Check the connection
     with st.spinner("⌛️ &nbsp;&nbsp; Haystack is starting..."):
         if not haystack_is_ready():
             st.error("🚫 &nbsp;&nbsp; Connection Error. Is Haystack running?")
             run_query = False
+            reset_results()
 
     # Get results for query
-    if run_query:
+    if run_query and question:
+        reset_results()
         with st.spinner(
             "🧠 &nbsp;&nbsp; Performing neural search on documents... \n "
             "Do you want to optimize speed or accuracy? \n"
             "Check out the docs: https://haystack.deepset.ai/usage/optimization "
         ):
             try:
-                results, raw_json = retrieve_doc(question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever)
+                state.results, state.raw_json = retrieve_doc(question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever)
             except Exception as e:
                 logging.exception(e)
-                st.error("🐞 &nbsp;&nbsp; An error occurred during the request. Check the logs in the console to know more.")
+                if "The server is busy processing requests" in str(e):
+                    st.error("🧑‍🌾 &nbsp;&nbsp; All our workers are busy! Try again later.")
+                else:
+                    st.error("🐞 &nbsp;&nbsp; An error occurred during the request. Check the logs in the console to know more.")
                 return
 
-        # Show if we use a question of the given set
-        if question == random_question and eval_mode:
+    if state.results:
+
+        # Show the gold answer if we use a question of the given set
+        if question == state.random_question and eval_mode:
             st.write("## Correct answers:")
-            random_answer
+            st.write(state.random_answer)
 
         st.write("## Results:")
+        count = 0  # Make every button key unique
 
-        # Make every button key unique
-        count = 0
-
-        for result in results:
+        for result in state.results:
             if result["answer"]:
-                annotate_answer(result["answer"], result["context"])
+                answer, context = result["answer"], result["context"]
+                start_idx = context.find(answer)
+                end_idx = start_idx + len(answer)
+                annotated_text(context[:start_idx], (answer, "ANSWER", "#8ef"), context[end_idx:])
             else:
-                show_plain_documents(result["context"])
+                st.markdown(result["context"])
+
             st.write("**Relevance:** ", result["relevance"], "**Source:** ", result["source"])
             if eval_mode:
                 # Define columns for buttons
-                button_col1, button_col2, button_col3, button_col4 = st.columns([1, 1, 1, 6])
-                if button_col1.button("👍", key=(result["context"] + str(count) + "1"), help="Correct answer"):
-                    raw_json_feedback = feedback_doc(
-                        question, "true", result["document_id"], 1, "true", result["answer"], result["offset_start_in_doc"]
+                button_col1, button_col2, button_col3, _ = st.columns([1, 1, 1, 6])
+                if button_col1.button("👍", key=f"{result['context']}{count}1", help="Correct answer"):
+                    feedback_doc(
+                        question=question, 
+                        is_correct_answer="true", 
+                        document_id=result["document_id"], 
+                        model_id=1, 
+                        is_correct_document="true",
+                        answer=result["answer"], 
+                        offset_start_in_doc=result["offset_start_in_doc"]
                     )
-                    st.success("Thanks for your feedback")
-                if button_col2.button("👎", key=(result["context"] + str(count) + "2"), help="Wrong answer and wrong passage"):
-                    raw_json_feedback = feedback_doc(
-                        question,
-                        "false",
-                        result["document_id"],
-                        1,
-                        "false",
-                        result["answer"],
-                        result["offset_start_in_doc"],
+                    st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
+
+                if button_col2.button("👎", key=f"{result['context']}{count}2", help="Wrong answer and wrong passage"):
+                    feedback_doc(
+                        question=question, 
+                        is_correct_answer="false", 
+                        document_id=result["document_id"], 
+                        model_id=1, 
+                        is_correct_document="false",
+                        answer=result["answer"], 
+                        offset_start_in_doc=result["offset_start_in_doc"]
                     )
-                    st.success("Thanks for your feedback!")
-                if button_col3.button("👎👍", key=(result["context"] + str(count) + "3"), help="Wrong answer, but correct passage"):
-                    raw_json_feedback = feedback_doc(
-                        question, "false", result["document_id"], 1, "true", result["answer"], result["offset_start_in_doc"]
+                    st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
+
+                if button_col3.button("👎👍", key=f"{result['context']}{count}3", help="Wrong answer, but correct passage"):
+                    feedback_doc(
+                        question=question, 
+                        is_correct_answer="false", 
+                        document_id=result["document_id"], 
+                        model_id=1, 
+                        is_correct_document="true",
+                        answer=result["answer"], 
+                        offset_start_in_doc=result["offset_start_in_doc"]
                     )
-                    st.success("Thanks for your feedback!")
+                    st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
                 count += 1
             st.write("___")
+
         if debug:
             st.subheader("REST API JSON response")
-            st.write(raw_json)
+            st.write(state.raw_json)
+
 
 main()