Small fixes to the public demo (#1781)

* Make strealit tolerant to haystack not knowing its version, and adding special error for docstore issues

* Add workaround for a Streamlit bug

* Make default filters value an empty dict

* Return more context for each answer in the rest api

* Make the hs_version call not-blocking by adding a very quick timeout

* Add disclaimer on low confidence answer

* Use the no-answer feature of the reader to highlight questions with no good answer
This commit is contained in:
Sara Zan 2021-11-22 19:06:08 +01:00 committed by GitHub
parent 9211c4c64d
commit 7167a26483
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 58 additions and 33 deletions

View File

@ -14,6 +14,8 @@ components: # define all the building-blocks for Pipeline
type: FARMReader # Haystack Class name for the component
params:
model_name_or_path: deepset/roberta-base-squad2
context_window_size: 500
return_no_answer: true
- name: TextFileConverter
type: TextConverter
- name: PDFFileConverter

View File

@ -24,10 +24,9 @@ def haystack_is_ready():
@st.cache
def haystack_version():
url = f"{API_ENDPOINT}/{HS_VERSION}"
return requests.get(url).json()["hs_version"]
return requests.get(url, timeout=0.1).json()["hs_version"]
def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5):
def retrieve_doc(query, filters={}, top_k_reader=5, top_k_retriever=5):
# Query Haystack API
url = f"{API_ENDPOINT}/{DOC_REQUEST}"
params = {"filters": filters, "Retriever": {"top_k": top_k_retriever}, "Reader": {"top_k": top_k_reader}}
@ -43,7 +42,7 @@ def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5):
answers = response_raw["answers"]
for i in range(len(answers)):
answer = answers[i]
answer_text = answer["answer"]
answer_text = answer.get("answer", None)
if answer_text:
result.append(
{
@ -55,24 +54,35 @@ def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5):
"offset_start_in_doc": answer["offsets_in_document"][0]["start"],
}
)
else:
result.append(
{
"context": None,
"answer": None,
"relevance": round(answer["score"] * 100, 2),
}
)
return result, response_raw
def feedback_doc(question, is_correct_answer, document_id, model_id, is_correct_document, answer, offset_start_in_doc):
# Feedback Haystack API
url = f"{API_ENDPOINT}/{DOC_FEEDBACK}"
#TODO adjust after Label refactoring
req = {
"question": question,
"is_correct_answer": is_correct_answer,
"document_id": document_id,
"model_id": model_id,
"is_correct_document": is_correct_document,
"answer": answer,
"offset_start_in_doc": offset_start_in_doc,
}
response_raw = requests.post(url, json=req).json()
return response_raw
try:
url = f"{API_ENDPOINT}/{DOC_FEEDBACK}"
#TODO adjust after Label refactoring
req = {
"question": question,
"is_correct_answer": is_correct_answer,
"document_id": document_id,
"model_id": model_id,
"is_correct_document": is_correct_document,
"answer": answer,
"offset_start_in_doc": offset_start_in_doc,
}
response_raw = requests.post(url, json=req).json()
return response_raw
except Exception as e:
logging.exception(e)
def upload_doc(file):

View File

@ -1,11 +1,15 @@
import os
import sys
import html
import logging
import pandas as pd
from json import JSONDecodeError
from pathlib import Path
import streamlit as st
from annotated_text import annotated_text
from annotated_text import annotation
from markdown import markdown
from htbuilder import H
# streamlit does not support any states out of the box. On every button click, streamlit reload the whole page
# and every value gets lost. To keep track of our feedback state we use the official streamlit gist mentioned
@ -23,9 +27,6 @@ EVAL_LABELS = os.getenv("EVAL_FILE", Path(__file__).parent / "eval_labels_exampl
# Whether the file upload should be enabled or not
DISABLE_FILE_UPLOAD = os.getenv("HAYSTACK_UI_DISABLE_FILE_UPLOAD")
# Retrieve Haystack version from the REST API
HS_VERSION = haystack_version()
def main():
@ -66,6 +67,12 @@ def main():
st.subheader("REST API JSON response")
st.sidebar.write(raw_json)
hs_version = None
try:
hs_version = f" <small>(v{haystack_version()})</small>"
except Exception:
pass
st.sidebar.markdown(f"""
<style>
a {{
@ -84,7 +91,7 @@ def main():
</style>
<div class="haystack-footer">
<hr />
<h4>Built with <a href="https://www.deepset.ai/haystack">Haystack</a> <small>(v{HS_VERSION})</small></h4>
<h4>Built with <a href="https://www.deepset.ai/haystack">Haystack</a>{hs_version}</h4>
<p>Get it on <a href="https://github.com/deepset-ai/haystack/">GitHub</a> &nbsp;&nbsp; - &nbsp;&nbsp; Read the <a href="https://haystack.deepset.ai/overview/intro">Docs</a></p>
<small>Data crawled from <a href="https://en.wikipedia.org/wiki/Category:Lists_of_countries_by_continent">Wikipedia</a> in November 2021.<br />See the <a href="https://creativecommons.org/licenses/by-sa/3.0/">License</a> (CC BY-SA 3.0).</small>
</div>
@ -134,6 +141,9 @@ def main():
):
try:
state.results, state.raw_json = retrieve_doc(question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever)
except JSONDecodeError as je:
st.error("👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?")
return
except Exception as e:
logging.exception(e)
if "The server is busy processing requests" in str(e):
@ -157,11 +167,14 @@ def main():
answer, context = result["answer"], result["context"]
start_idx = context.find(answer)
end_idx = start_idx + len(answer)
annotated_text(context[:start_idx], (answer, "ANSWER", "#8ef"), context[end_idx:])
else:
st.markdown(result["context"])
# Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190
st.write(markdown(context[:start_idx] + str(annotation(answer, "ANSWER", "#8ef")) + context[end_idx:]), unsafe_allow_html=True)
st.write("**Relevance:** ", result["relevance"], "**Source:** ", result["source"])
st.write("**Relevance:** ", result["relevance"], "**Source:** ", result["source"])
else:
st.warning("🤔 &nbsp;&nbsp; Haystack found no good answer to your question. Try to formulate it differently!")
st.write("**Relevance:** ", result["relevance"])
if eval_mode:
# Define columns for buttons
button_col1, button_col2, button_col3, _ = st.columns([1, 1, 1, 6])
@ -169,11 +182,11 @@ def main():
feedback_doc(
question=question,
is_correct_answer="true",
document_id=result["document_id"],
document_id=result.get("document_id", None),
model_id=1,
is_correct_document="true",
answer=result["answer"],
offset_start_in_doc=result["offset_start_in_doc"]
answer=result["answer"],
offset_start_in_doc=result.get("offset_start_in_doc", None)
)
st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
@ -181,11 +194,11 @@ def main():
feedback_doc(
question=question,
is_correct_answer="false",
document_id=result["document_id"],
document_id=result.get("document_id", None),
model_id=1,
is_correct_document="false",
answer=result["answer"],
offset_start_in_doc=result["offset_start_in_doc"]
offset_start_in_doc=result.get("offset_start_in_doc", None)
)
st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
@ -193,11 +206,11 @@ def main():
feedback_doc(
question=question,
is_correct_answer="false",
document_id=result["document_id"],
document_id=result.get("document_id", None),
model_id=1,
is_correct_document="true",
answer=result["answer"],
offset_start_in_doc=result["offset_start_in_doc"]
offset_start_in_doc=result.get("offset_start_in_doc", None)
)
st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
count += 1