2021-04-22 17:30:17 +02:00
import os
import sys
2021-04-30 14:16:30 +05:30
2021-09-27 16:40:25 +02:00
import logging
2021-04-30 14:16:30 +05:30
import pandas as pd
2021-11-22 19:06:08 +01:00
from json import JSONDecodeError
2021-11-19 11:34:32 +01:00
from pathlib import Path
2020-12-27 18:06:09 +05:30
import streamlit as st
2021-11-22 19:06:08 +01:00
from annotated_text import annotation
from markdown import markdown
2021-04-30 14:16:30 +05:30
# streamlit does not support any states out of the box. On every button click, streamlit reload the whole page
# and every value gets lost. To keep track of our feedback state we use the official streamlit gist mentioned
2021-04-22 17:30:17 +02:00
# here https://gist.github.com/tvst/036da038ab3e999a64497f42de966a92
import SessionState
2021-12-02 13:37:23 +01:00
from utils import HS_VERSION , haystack_is_ready , query , send_feedback , upload_doc , haystack_version , get_backlink
2021-06-24 09:53:08 +02:00
2021-04-07 17:53:32 +02:00
2021-11-19 11:34:32 +01:00
# Adjust to a question that you would like users to see in the search bar when they load the UI:
2021-11-30 18:11:54 +01:00
DEFAULT_QUESTION_AT_STARTUP = os . getenv ( " DEFAULT_QUESTION_AT_STARTUP " , " What ' s the capital of France? " )
# Sliders
DEFAULT_DOCS_FROM_RETRIEVER = int ( os . getenv ( " DEFAULT_DOCS_FROM_RETRIEVER " , 3 ) )
DEFAULT_NUMBER_OF_ANSWERS = int ( os . getenv ( " DEFAULT_NUMBER_OF_ANSWERS " , 3 ) )
2021-10-13 14:23:23 +02:00
2021-11-19 11:34:32 +01:00
# Labels for the evaluation
EVAL_LABELS = os . getenv ( " EVAL_FILE " , Path ( __file__ ) . parent / " eval_labels_example.csv " )
2021-06-24 09:53:08 +02:00
2021-11-19 11:34:32 +01:00
# Whether the file upload should be enabled or not
2021-11-30 18:11:54 +01:00
DISABLE_FILE_UPLOAD = bool ( os . getenv ( " DISABLE_FILE_UPLOAD " ) )
2021-06-24 09:53:08 +02:00
2021-04-07 17:53:32 +02:00
2021-09-27 16:40:25 +02:00
def main ( ) :
2021-11-19 11:34:32 +01:00
2021-12-01 22:25:59 +01:00
st . set_page_config ( page_title = ' Haystack Demo ' , page_icon = " https://haystack.deepset.ai/img/HaystackIcon.png " )
2021-11-19 11:34:32 +01:00
# Persistent state
state = SessionState . get (
2021-12-03 15:58:47 +01:00
random_question = DEFAULT_QUESTION_AT_STARTUP ,
2021-11-19 11:34:32 +01:00
random_answer = " " ,
2021-12-01 22:25:59 +01:00
last_question = DEFAULT_QUESTION_AT_STARTUP ,
2021-11-19 11:34:32 +01:00
results = None ,
raw_json = None ,
2021-09-27 16:40:25 +02:00
)
2021-11-19 11:34:32 +01:00
# Small callback to reset the interface in case the text of the question changes
def reset_results ( * args ) :
state . results = None
state . raw_json = None
2021-09-27 16:40:25 +02:00
2021-11-19 11:34:32 +01:00
# Title
2021-11-25 15:27:09 +01:00
st . write ( " # Haystack Demo - Explore the world " )
2021-12-01 22:25:59 +01:00
st . markdown ( """
2021-12-03 15:58:47 +01:00
This demo takes its data from a selection of Wikipedia pages crawled in November 2021 on the topic of
2021-12-01 22:25:59 +01:00
< h3 style = ' text-align:center;padding: 0 0 1rem; ' > Countries and capital cities < / h3 >
2021-11-25 15:27:09 +01:00
2021-12-01 22:25:59 +01:00
Ask any question on this topic and see if Haystack can find the correct answer to your query !
2021-11-25 15:27:09 +01:00
2021-12-01 22:25:59 +01:00
* Note : do not use keywords , but full - fledged questions . * The demo is not optimized to deal with keyword queries and might misunderstand you .
""" , unsafe_allow_html=True)
2021-11-19 11:34:32 +01:00
# Sidebar
2021-09-27 16:40:25 +02:00
st . sidebar . header ( " Options " )
2021-11-30 18:11:54 +01:00
top_k_reader = st . sidebar . slider (
2021-12-03 15:58:47 +01:00
" Max. number of answers " ,
min_value = 1 ,
max_value = 10 ,
value = DEFAULT_NUMBER_OF_ANSWERS ,
step = 1 ,
2021-11-30 18:11:54 +01:00
on_change = reset_results )
top_k_retriever = st . sidebar . slider (
2021-12-03 15:58:47 +01:00
" Max. number of documents from retriever " ,
min_value = 1 ,
max_value = 10 ,
value = DEFAULT_DOCS_FROM_RETRIEVER ,
step = 1 ,
2021-11-30 18:11:54 +01:00
on_change = reset_results )
2021-09-27 16:40:25 +02:00
eval_mode = st . sidebar . checkbox ( " Evaluation mode " )
debug = st . sidebar . checkbox ( " Show debug info " )
2021-11-19 11:34:32 +01:00
# File upload block
if not DISABLE_FILE_UPLOAD :
st . sidebar . write ( " ## File Upload: " )
data_files = st . sidebar . file_uploader ( " " , type = [ " pdf " , " txt " , " docx " ] , accept_multiple_files = True )
for data_file in data_files :
# Upload file
if data_file :
raw_json = upload_doc ( data_file )
st . sidebar . write ( str ( data_file . name ) + " ✅ " )
if debug :
st . subheader ( " REST API JSON response " )
st . sidebar . write ( raw_json )
2021-12-01 22:25:59 +01:00
hs_version = " "
2021-11-22 19:06:08 +01:00
try :
hs_version = f " <small>(v { haystack_version ( ) } )</small> "
except Exception :
pass
2021-11-19 11:34:32 +01:00
st . sidebar . markdown ( f """
< style >
a { {
text - decoration : none ;
} }
. haystack - footer { {
text - align : center ;
} }
. haystack - footer h4 { {
2021-12-03 15:58:47 +01:00
margin : 0.1 rem ;
2021-11-19 11:34:32 +01:00
padding : 0 ;
} }
footer { {
opacity : 0 ;
} }
< / style >
< div class = " haystack-footer " >
< hr / >
2021-11-22 19:06:08 +01:00
< h4 > Built with < a href = " https://www.deepset.ai/haystack " > Haystack < / a > { hs_version } < / h4 >
2021-11-19 11:34:32 +01:00
< p > Get it on < a href = " https://github.com/deepset-ai/haystack/ " > GitHub < / a > & nbsp ; & nbsp ; - & nbsp ; & nbsp ; Read the < a href = " https://haystack.deepset.ai/overview/intro " > Docs < / a > < / p >
< small > Data crawled from < a href = " https://en.wikipedia.org/wiki/Category:Lists_of_countries_by_continent " > Wikipedia < / a > in November 2021. < br / > See the < a href = " https://creativecommons.org/licenses/by-sa/3.0/ " > License < / a > ( CC BY - SA 3.0 ) . < / small >
< / div >
""" , unsafe_allow_html=True)
# Load csv into pandas dataframe
2021-11-24 15:55:44 +01:00
try :
df = pd . read_csv ( EVAL_LABELS , sep = " ; " )
except Exception :
st . error ( f " The eval file was not found. Please check the demo ' s [README](https://github.com/deepset-ai/haystack/tree/master/ui/README.md) for more information. " )
sys . exit ( f " The eval file was not found under ` { EVAL_LABELS } `. Please check the README (https://github.com/deepset-ai/haystack/tree/master/ui/README.md) for more information. " )
2021-04-30 14:16:30 +05:30
2021-09-27 16:40:25 +02:00
# Search bar
2021-11-25 15:27:09 +01:00
question = st . text_input ( " " ,
2021-11-24 15:55:44 +01:00
value = state . random_question ,
2021-12-03 15:58:47 +01:00
max_chars = 100 ,
2021-11-19 11:34:32 +01:00
on_change = reset_results
)
2021-11-24 15:55:44 +01:00
col1 , col2 = st . columns ( 2 )
col1 . markdown ( " <style>.stButton button { width:100 % ;}</style> " , unsafe_allow_html = True )
col2 . markdown ( " <style>.stButton button { width:100 % ;}</style> " , unsafe_allow_html = True )
# Run button
2021-12-01 22:25:59 +01:00
run_pressed = col1 . button ( " Run " )
run_query = run_pressed or question != state . last_question
2021-11-24 15:55:44 +01:00
# Get next random question from the CSV
2021-12-01 22:25:59 +01:00
#state.get_next_question = col2.button("Random question")
if col2 . button ( " Random question " ) :
2021-11-24 15:55:44 +01:00
reset_results ( )
2021-12-03 15:58:47 +01:00
new_row = df . sample ( 1 )
2021-11-24 15:55:44 +01:00
while new_row [ " Question Text " ] . values [ 0 ] == state . random_question : # Avoid picking the same question twice (the change is not visible on the UI)
new_row = df . sample ( 1 )
state . random_question = new_row [ " Question Text " ] . values [ 0 ]
state . random_answer = new_row [ " Answer " ] . values [ 0 ]
# Re-runs the script setting the random question as the textbox value
# Unfortunately necessary as the Random Question button is _below_ the textbox
raise st . script_runner . RerunException ( st . script_request_queue . RerunData ( None ) )
2021-09-27 16:40:25 +02:00
2021-11-19 11:34:32 +01:00
# Check the connection
2021-09-27 16:40:25 +02:00
with st . spinner ( " ⌛️ Haystack is starting... " ) :
if not haystack_is_ready ( ) :
st . error ( " 🚫 Connection Error. Is Haystack running? " )
run_query = False
2021-11-19 11:34:32 +01:00
reset_results ( )
2021-09-27 16:40:25 +02:00
# Get results for query
2021-11-19 11:34:32 +01:00
if run_query and question :
reset_results ( )
2021-12-01 22:25:59 +01:00
state . last_question = question
2021-09-27 16:40:25 +02:00
with st . spinner (
" 🧠 Performing neural search on documents... \n "
" Do you want to optimize speed or accuracy? \n "
" Check out the docs: https://haystack.deepset.ai/usage/optimization "
) :
try :
2021-11-29 17:03:54 +01:00
state . results , state . raw_json = query ( question , top_k_reader = top_k_reader , top_k_retriever = top_k_retriever )
2021-11-22 19:06:08 +01:00
except JSONDecodeError as je :
st . error ( " 👓 An error occurred reading the results. Is the document store working? " )
return
2021-09-27 16:40:25 +02:00
except Exception as e :
logging . exception ( e )
2021-11-30 18:11:54 +01:00
if " The server is busy processing requests " in str ( e ) or " 503 " in str ( e ) :
2021-11-19 11:34:32 +01:00
st . error ( " 🧑🌾 All our workers are busy! Try again later. " )
else :
2021-11-29 17:03:54 +01:00
st . error ( " 🐞 An error occurred during the request. " )
2021-09-27 16:40:25 +02:00
return
2021-11-19 11:34:32 +01:00
if state . results :
# Show the gold answer if we use a question of the given set
2021-11-29 17:03:54 +01:00
if question == state . random_question and eval_mode and state . random_answer :
2021-09-27 16:40:25 +02:00
st . write ( " ## Correct answers: " )
2021-11-19 11:34:32 +01:00
st . write ( state . random_answer )
2021-09-27 16:40:25 +02:00
st . write ( " ## Results: " )
2021-11-29 17:03:54 +01:00
for count , result in enumerate ( state . results ) :
2021-09-27 16:40:25 +02:00
if result [ " answer " ] :
2021-11-19 11:34:32 +01:00
answer , context = result [ " answer " ] , result [ " context " ]
start_idx = context . find ( answer )
end_idx = start_idx + len ( answer )
2021-12-03 15:58:47 +01:00
# Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190
2021-11-22 19:06:08 +01:00
st . write ( markdown ( context [ : start_idx ] + str ( annotation ( answer , " ANSWER " , " #8ef " ) ) + context [ end_idx : ] ) , unsafe_allow_html = True )
2021-12-02 13:37:23 +01:00
source = " "
url , title = get_backlink ( result )
if url and title :
source = f " [ { result [ ' document ' ] [ ' meta ' ] [ ' title ' ] } ]( { result [ ' document ' ] [ ' meta ' ] [ ' url ' ] } ) "
else :
source = f " { result [ ' source ' ] } "
st . markdown ( f " **Relevance:** { result [ ' relevance ' ] } - **Source:** { source } " )
2021-11-19 11:34:32 +01:00
2021-11-22 19:06:08 +01:00
else :
2021-11-25 15:27:09 +01:00
st . info ( " 🤔 Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it! " )
2021-11-22 19:06:08 +01:00
st . write ( " **Relevance:** " , result [ " relevance " ] )
2021-12-03 15:58:47 +01:00
2021-11-29 19:42:10 +01:00
if eval_mode and result [ " answer " ] :
2021-09-27 16:40:25 +02:00
# Define columns for buttons
2021-11-29 17:03:54 +01:00
is_correct_answer = None
is_correct_document = None
2021-11-19 11:34:32 +01:00
button_col1 , button_col2 , button_col3 , _ = st . columns ( [ 1 , 1 , 1 , 6 ] )
if button_col1 . button ( " 👍 " , key = f " { result [ ' context ' ] } { count } 1 " , help = " Correct answer " ) :
2021-11-29 17:03:54 +01:00
is_correct_answer = True
is_correct_document = True
2021-11-19 11:34:32 +01:00
if button_col2 . button ( " 👎 " , key = f " { result [ ' context ' ] } { count } 2 " , help = " Wrong answer and wrong passage " ) :
2021-11-29 17:03:54 +01:00
is_correct_answer = False
is_correct_document = False
2021-11-19 11:34:32 +01:00
if button_col3 . button ( " 👎👍 " , key = f " { result [ ' context ' ] } { count } 3 " , help = " Wrong answer, but correct passage " ) :
2021-11-29 17:03:54 +01:00
is_correct_answer = False
is_correct_document = True
if is_correct_answer is not None and is_correct_document is not None :
try :
send_feedback (
query = question ,
answer_obj = result [ " _raw " ] ,
is_correct_answer = is_correct_answer ,
is_correct_document = is_correct_document ,
document = result [ " document " ]
)
st . success ( " ✨ Thanks for your feedback! ✨ " )
except Exception as e :
logging . exception ( e )
st . error ( " 🐞 An error occurred while submitting your feedback! " )
2021-09-27 16:40:25 +02:00
st . write ( " ___ " )
2021-11-19 11:34:32 +01:00
2021-09-27 16:40:25 +02:00
if debug :
st . subheader ( " REST API JSON response " )
2021-11-19 11:34:32 +01:00
st . write ( state . raw_json )
2021-09-27 16:40:25 +02:00
main ( )