Public demo (#1747)

* Queries now run only when pressing RUN. File upload hidden. Question is not sent if the textbox is empty.

* Add latest docstring and tutorial changes

* Tidy up: remove needless state, add comments, fix minor bugs

* Had to add results to the status to avoid some bugs in eval mode

* Added 'credits'

* Add footers, update requirements, some random questions for the evaluation

* Add requested changes

* Temporary rollback the UI to the old GoT dataset

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Sara Zan 2021-11-19 11:34:32 +01:00 committed by GitHub
parent c0892717a0
commit d81897535e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 158 additions and 118 deletions

View File

@ -4,6 +4,7 @@ from pathlib import Path
from fastapi import APIRouter from fastapi import APIRouter
import haystack
from haystack.pipelines.base import Pipeline from haystack.pipelines.base import Pipeline
from rest_api.config import PIPELINE_YAML_PATH, QUERY_PIPELINE_NAME from rest_api.config import PIPELINE_YAML_PATH, QUERY_PIPELINE_NAME
from rest_api.config import LOG_LEVEL, CONCURRENT_REQUEST_PER_WORKER from rest_api.config import LOG_LEVEL, CONCURRENT_REQUEST_PER_WORKER
@ -42,6 +43,11 @@ def check_status():
return True return True
@router.get("/hs_version")
def haystack_version():
return {"hs_version": haystack.__version__}
@router.post("/query", response_model=QueryResponse, response_model_exclude_none=True) @router.post("/query", response_model=QueryResponse, response_model_exclude_none=True)
def query(request: QueryRequest): def query(request: QueryRequest):
with concurrency_limiter.run(): with concurrency_limiter.run():

View File

@ -1,2 +1,2 @@
streamlit>=0.84.0 streamlit>=1.2.0
st-annotated-text==1.1.0 st-annotated-text==2.0.0

View File

@ -6,6 +6,7 @@ import streamlit as st
API_ENDPOINT = os.getenv("API_ENDPOINT", "http://localhost:8000") API_ENDPOINT = os.getenv("API_ENDPOINT", "http://localhost:8000")
STATUS = "initialized" STATUS = "initialized"
HS_VERSION = "hs_version"
DOC_REQUEST = "query" DOC_REQUEST = "query"
DOC_FEEDBACK = "feedback" DOC_FEEDBACK = "feedback"
DOC_UPLOAD = "file-upload" DOC_UPLOAD = "file-upload"
@ -20,8 +21,12 @@ def haystack_is_ready():
logging.exception(e) logging.exception(e)
return False return False
@st.cache
def haystack_version():
url = f"{API_ENDPOINT}/{HS_VERSION}"
return requests.get(url).json()["hs_version"]
@st.cache(show_spinner=False)
def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5): def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5):
# Query Haystack API # Query Haystack API
url = f"{API_ENDPOINT}/{DOC_REQUEST}" url = f"{API_ENDPOINT}/{DOC_REQUEST}"
@ -31,6 +36,10 @@ def retrieve_doc(query, filters=None, top_k_reader=5, top_k_retriever=5):
# Format response # Format response
result = [] result = []
if "errors" in response_raw:
raise Exception(", ".join(response_raw["errors"]))
answers = response_raw["answers"] answers = response_raw["answers"]
for i in range(len(answers)): for i in range(len(answers)):
answer = answers[i] answer = answers[i]

View File

@ -3,6 +3,7 @@ import sys
import logging import logging
import pandas as pd import pandas as pd
from pathlib import Path
import streamlit as st import streamlit as st
from annotated_text import annotated_text from annotated_text import annotated_text
@ -10,177 +11,201 @@ from annotated_text import annotated_text
# and every value gets lost. To keep track of our feedback state we use the official streamlit gist mentioned # and every value gets lost. To keep track of our feedback state we use the official streamlit gist mentioned
# here https://gist.github.com/tvst/036da038ab3e999a64497f42de966a92 # here https://gist.github.com/tvst/036da038ab3e999a64497f42de966a92
import SessionState import SessionState
from utils import feedback_doc, haystack_is_ready, retrieve_doc, upload_doc from utils import HS_VERSION, feedback_doc, haystack_is_ready, retrieve_doc, upload_doc, haystack_version
# Adjust to a question that you would like users to see in the search bar when they load the UI: # Adjust to a question that you would like users to see in the search bar when they load the UI:
DEFAULT_QUESTION_AT_STARTUP = "Who is the father of Arya Stark?" DEFAULT_QUESTION_AT_STARTUP = "Who's the father of Arya Stark?"
# Labels for the evaluation
EVAL_LABELS = os.getenv("EVAL_FILE", Path(__file__).parent / "eval_labels_example.csv")
def annotate_answer(answer, context): # Whether the file upload should be enabled or not
""" If we are using an extractive QA pipeline, we'll get answers DISABLE_FILE_UPLOAD = os.getenv("HAYSTACK_UI_DISABLE_FILE_UPLOAD")
from the API that we highlight in the given context"""
start_idx = context.find(answer)
end_idx = start_idx + len(answer)
# calculate dynamic height depending on context length
height = int(len(context) * 0.50) + 5
annotated_text(context[:start_idx], (answer, "ANSWER", "#8ef"), context[end_idx:], height=height)
# Retrieve Haystack version from the REST API
def show_plain_documents(text): HS_VERSION = haystack_version()
""" If we are using a plain document search pipeline, i.e. only retriever, we'll get plain documents
from the API that we just show without any highlighting"""
st.markdown(text)
def random_questions(df):
"""
Helper to get one random question + gold random_answer from the user's CSV 'eval_labels_example'.
This can then be shown in the UI when the evaluation mode is selected. Users can easily give feedback on the
model's results and "enrich" the eval dataset with more acceptable labels
"""
random_row = df.sample(1)
random_question = random_row["Question Text"].values[0]
random_answer = random_row["Answer"].values[0]
return random_question, random_answer
def main(): def main():
# Define state
state_question = SessionState.get( # Persistent state
random_question=DEFAULT_QUESTION_AT_STARTUP, random_answer="", next_question="false", run_query="false" state = SessionState.get(
random_question=DEFAULT_QUESTION_AT_STARTUP,
random_answer="",
results=None,
raw_json=None,
get_next_question=True
) )
# Initialize variables # Small callback to reset the interface in case the text of the question changes
eval_mode = False def reset_results(*args):
random_question = DEFAULT_QUESTION_AT_STARTUP state.results = None
eval_labels = os.getenv("EVAL_FILE", "eval_labels_example.csv") state.raw_json = None
# UI search bar and sidebar # Title
st.write("# Haystack Demo") st.write("# Haystack Demo")
# Sidebar
st.sidebar.header("Options") st.sidebar.header("Options")
top_k_reader = st.sidebar.slider("Max. number of answers", min_value=1, max_value=10, value=3, step=1) top_k_reader = st.sidebar.slider("Max. number of answers", min_value=1, max_value=10, value=3, step=1)
top_k_retriever = st.sidebar.slider( top_k_retriever = st.sidebar.slider("Max. number of documents from retriever", min_value=1, max_value=10, value=3, step=1)
"Max. number of documents from retriever", min_value=1, max_value=10, value=3, step=1
)
eval_mode = st.sidebar.checkbox("Evaluation mode") eval_mode = st.sidebar.checkbox("Evaluation mode")
debug = st.sidebar.checkbox("Show debug info") debug = st.sidebar.checkbox("Show debug info")
st.sidebar.write("## File Upload:") # File upload block
data_files = st.sidebar.file_uploader("", type=["pdf", "txt", "docx"], accept_multiple_files=True) if not DISABLE_FILE_UPLOAD:
for data_file in data_files: st.sidebar.write("## File Upload:")
# Upload file data_files = st.sidebar.file_uploader("", type=["pdf", "txt", "docx"], accept_multiple_files=True)
if data_file: for data_file in data_files:
raw_json = upload_doc(data_file) # Upload file
st.sidebar.write(raw_json) if data_file:
if debug: raw_json = upload_doc(data_file)
st.subheader("REST API JSON response") st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ✅ ")
st.sidebar.write(raw_json) if debug:
st.subheader("REST API JSON response")
st.sidebar.write(raw_json)
# load csv into pandas dataframe st.sidebar.markdown(f"""
<style>
a {{
text-decoration: none;
}}
.haystack-footer {{
text-align: center;
}}
.haystack-footer h4 {{
margin: 0.1rem;
padding:0;
}}
footer {{
opacity: 0;
}}
</style>
<div class="haystack-footer">
<hr />
<h4>Built with <a href="https://www.deepset.ai/haystack">Haystack</a> <small>(v{HS_VERSION})</small></h4>
<p>Get it on <a href="https://github.com/deepset-ai/haystack/">GitHub</a> &nbsp;&nbsp; - &nbsp;&nbsp; Read the <a href="https://haystack.deepset.ai/overview/intro">Docs</a></p>
<small>Data crawled from <a href="https://en.wikipedia.org/wiki/Category:Lists_of_countries_by_continent">Wikipedia</a> in November 2021.<br />See the <a href="https://creativecommons.org/licenses/by-sa/3.0/">License</a> (CC BY-SA 3.0).</small>
</div>
""", unsafe_allow_html=True)
# Load csv into pandas dataframe
if eval_mode: if eval_mode:
try: try:
df = pd.read_csv(eval_labels, sep=";") df = pd.read_csv(EVAL_LABELS, sep=";")
except Exception: except Exception:
sys.exit("The eval file was not found. Please check the README for more information.") st.error(f"The eval file was not found. Please check the demo's [README](https://github.com/deepset-ai/haystack/tree/master/ui/README.md) for more information.")
if ( sys.exit(f"The eval file was not found under `{EVAL_LABELS}`. Please check the README (https://github.com/deepset-ai/haystack/tree/master/ui/README.md) for more information.")
state_question
and hasattr(state_question, "next_question")
and hasattr(state_question, "random_question")
and state_question.next_question
):
random_question = state_question.random_question
random_answer = state_question.random_answer
else:
random_question, random_answer = random_questions(df)
state_question.random_question = random_question
state_question.random_answer = random_answer
# Get next random question from the CSV # Get next random question from the CSV
if eval_mode: state.get_next_question = st.button("Load new question")
next_question = st.button("Load new question") if state.get_next_question:
if next_question: reset_results()
random_question, random_answer = random_questions(df) new_row = df.sample(1)
state_question.random_question = random_question while new_row["Question Text"].values[0] == state.random_question: # Avoid picking the same question twice (the change is not visible on the UI)
state_question.random_answer = random_answer new_row = df.sample(1)
state_question.next_question = True state.random_question = new_row["Question Text"].values[0]
state_question.run_query = False state.random_answer = new_row["Answer"].values[0]
else:
state_question.next_question = False
# Search bar # Search bar
question = st.text_input("Please provide your query:", value=random_question) question = st.text_input(
if state_question and state_question.run_query: "Please provide your query:",
run_query = state_question.run_query value=state.random_question,
st.button("Run") max_chars=100,
else: on_change=reset_results
run_query = st.button("Run") )
state_question.run_query = run_query run_query = st.button("Run")
raw_json_feedback = ""
# Check the connection
with st.spinner("⌛️ &nbsp;&nbsp; Haystack is starting..."): with st.spinner("⌛️ &nbsp;&nbsp; Haystack is starting..."):
if not haystack_is_ready(): if not haystack_is_ready():
st.error("🚫 &nbsp;&nbsp; Connection Error. Is Haystack running?") st.error("🚫 &nbsp;&nbsp; Connection Error. Is Haystack running?")
run_query = False run_query = False
reset_results()
# Get results for query # Get results for query
if run_query: if run_query and question:
reset_results()
with st.spinner( with st.spinner(
"🧠 &nbsp;&nbsp; Performing neural search on documents... \n " "🧠 &nbsp;&nbsp; Performing neural search on documents... \n "
"Do you want to optimize speed or accuracy? \n" "Do you want to optimize speed or accuracy? \n"
"Check out the docs: https://haystack.deepset.ai/usage/optimization " "Check out the docs: https://haystack.deepset.ai/usage/optimization "
): ):
try: try:
results, raw_json = retrieve_doc(question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever) state.results, state.raw_json = retrieve_doc(question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever)
except Exception as e: except Exception as e:
logging.exception(e) logging.exception(e)
st.error("🐞 &nbsp;&nbsp; An error occurred during the request. Check the logs in the console to know more.") if "The server is busy processing requests" in str(e):
st.error("🧑‍🌾 &nbsp;&nbsp; All our workers are busy! Try again later.")
else:
st.error("🐞 &nbsp;&nbsp; An error occurred during the request. Check the logs in the console to know more.")
return return
# Show if we use a question of the given set if state.results:
if question == random_question and eval_mode:
# Show the gold answer if we use a question of the given set
if question == state.random_question and eval_mode:
st.write("## Correct answers:") st.write("## Correct answers:")
random_answer st.write(state.random_answer)
st.write("## Results:") st.write("## Results:")
count = 0 # Make every button key unique
# Make every button key unique for result in state.results:
count = 0
for result in results:
if result["answer"]: if result["answer"]:
annotate_answer(result["answer"], result["context"]) answer, context = result["answer"], result["context"]
start_idx = context.find(answer)
end_idx = start_idx + len(answer)
annotated_text(context[:start_idx], (answer, "ANSWER", "#8ef"), context[end_idx:])
else: else:
show_plain_documents(result["context"]) st.markdown(result["context"])
st.write("**Relevance:** ", result["relevance"], "**Source:** ", result["source"]) st.write("**Relevance:** ", result["relevance"], "**Source:** ", result["source"])
if eval_mode: if eval_mode:
# Define columns for buttons # Define columns for buttons
button_col1, button_col2, button_col3, button_col4 = st.columns([1, 1, 1, 6]) button_col1, button_col2, button_col3, _ = st.columns([1, 1, 1, 6])
if button_col1.button("👍", key=(result["context"] + str(count) + "1"), help="Correct answer"): if button_col1.button("👍", key=f"{result['context']}{count}1", help="Correct answer"):
raw_json_feedback = feedback_doc( feedback_doc(
question, "true", result["document_id"], 1, "true", result["answer"], result["offset_start_in_doc"] question=question,
is_correct_answer="true",
document_id=result["document_id"],
model_id=1,
is_correct_document="true",
answer=result["answer"],
offset_start_in_doc=result["offset_start_in_doc"]
) )
st.success("Thanks for your feedback") st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
if button_col2.button("👎", key=(result["context"] + str(count) + "2"), help="Wrong answer and wrong passage"):
raw_json_feedback = feedback_doc( if button_col2.button("👎", key=f"{result['context']}{count}2", help="Wrong answer and wrong passage"):
question, feedback_doc(
"false", question=question,
result["document_id"], is_correct_answer="false",
1, document_id=result["document_id"],
"false", model_id=1,
result["answer"], is_correct_document="false",
result["offset_start_in_doc"], answer=result["answer"],
offset_start_in_doc=result["offset_start_in_doc"]
) )
st.success("Thanks for your feedback!") st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
if button_col3.button("👎👍", key=(result["context"] + str(count) + "3"), help="Wrong answer, but correct passage"):
raw_json_feedback = feedback_doc( if button_col3.button("👎👍", key=f"{result['context']}{count}3", help="Wrong answer, but correct passage"):
question, "false", result["document_id"], 1, "true", result["answer"], result["offset_start_in_doc"] feedback_doc(
question=question,
is_correct_answer="false",
document_id=result["document_id"],
model_id=1,
is_correct_document="true",
answer=result["answer"],
offset_start_in_doc=result["offset_start_in_doc"]
) )
st.success("Thanks for your feedback!") st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
count += 1 count += 1
st.write("___") st.write("___")
if debug: if debug:
st.subheader("REST API JSON response") st.subheader("REST API JSON response")
st.write(raw_json) st.write(state.raw_json)
main() main()