2021-04-22 17:30:17 +02:00
import os
import sys
2021-09-27 16:40:25 +02:00
import logging
2021-04-30 14:16:30 +05:30
import pandas as pd
2021-11-22 19:06:08 +01:00
from json import JSONDecodeError
2021-11-19 11:34:32 +01:00
from pathlib import Path
2022-02-09 17:35:18 +01:00
import streamlit as st
from annotated_text import annotation
from markdown import markdown
2021-04-30 14:16:30 +05:30
2022-02-09 17:35:18 +01:00
from ui . utils import haystack_is_ready , query , send_feedback , upload_doc , haystack_version , get_backlink
2021-06-24 09:53:08 +02:00
2021-04-07 17:53:32 +02:00
2021-11-19 11:34:32 +01:00
# Adjust to a question that you would like users to see in the search bar when they load the UI:
2021-11-30 18:11:54 +01:00
DEFAULT_QUESTION_AT_STARTUP = os . getenv ( " DEFAULT_QUESTION_AT_STARTUP " , " What ' s the capital of France? " )
2021-12-06 18:55:39 +01:00
DEFAULT_ANSWER_AT_STARTUP = os . getenv ( " DEFAULT_ANSWER_AT_STARTUP " , " Paris " )
2021-11-30 18:11:54 +01:00
# Sliders
2022-02-21 20:16:14 +01:00
DEFAULT_DOCS_FROM_RETRIEVER = int ( os . getenv ( " DEFAULT_DOCS_FROM_RETRIEVER " , " 3 " ) )
DEFAULT_NUMBER_OF_ANSWERS = int ( os . getenv ( " DEFAULT_NUMBER_OF_ANSWERS " , " 3 " ) )
2021-10-13 14:23:23 +02:00
2021-11-19 11:34:32 +01:00
# Labels for the evaluation
2022-02-21 20:16:14 +01:00
EVAL_LABELS = os . getenv ( " EVAL_FILE " , str ( Path ( __file__ ) . parent / " eval_labels_example.csv " ) )
2021-06-24 09:53:08 +02:00
2021-11-19 11:34:32 +01:00
# Whether the file upload should be enabled or not
2021-11-30 18:11:54 +01:00
DISABLE_FILE_UPLOAD = bool ( os . getenv ( " DISABLE_FILE_UPLOAD " ) )
2021-06-24 09:53:08 +02:00
2021-04-07 17:53:32 +02:00
2022-01-18 14:59:42 +01:00
def set_state_if_absent ( key , value ) :
if key not in st . session_state :
st . session_state [ key ] = value
2022-02-03 13:43:18 +01:00
2021-09-27 16:40:25 +02:00
def main ( ) :
2021-11-19 11:34:32 +01:00
2022-02-03 13:43:18 +01:00
st . set_page_config ( page_title = " Haystack Demo " , page_icon = " https://haystack.deepset.ai/img/HaystackIcon.png " )
2021-12-01 22:25:59 +01:00
2021-11-19 11:34:32 +01:00
# Persistent state
2022-02-03 13:43:18 +01:00
set_state_if_absent ( " question " , DEFAULT_QUESTION_AT_STARTUP )
set_state_if_absent ( " answer " , DEFAULT_ANSWER_AT_STARTUP )
set_state_if_absent ( " results " , None )
set_state_if_absent ( " raw_json " , None )
set_state_if_absent ( " random_question_requested " , False )
2021-09-27 16:40:25 +02:00
2021-11-19 11:34:32 +01:00
# Small callback to reset the interface in case the text of the question changes
def reset_results ( * args ) :
2022-01-18 14:59:42 +01:00
st . session_state . answer = None
st . session_state . results = None
st . session_state . raw_json = None
2021-09-27 16:40:25 +02:00
2021-11-19 11:34:32 +01:00
# Title
2021-11-25 15:27:09 +01:00
st . write ( " # Haystack Demo - Explore the world " )
2022-02-03 13:43:18 +01:00
st . markdown (
"""
2021-12-03 15:58:47 +01:00
This demo takes its data from a selection of Wikipedia pages crawled in November 2021 on the topic of
2021-12-01 22:25:59 +01:00
< h3 style = ' text-align:center;padding: 0 0 1rem; ' > Countries and capital cities < / h3 >
2021-11-25 15:27:09 +01:00
2021-12-01 22:25:59 +01:00
Ask any question on this topic and see if Haystack can find the correct answer to your query !
2021-11-25 15:27:09 +01:00
2021-12-01 22:25:59 +01:00
* Note : do not use keywords , but full - fledged questions . * The demo is not optimized to deal with keyword queries and might misunderstand you .
2022-02-03 13:43:18 +01:00
""" ,
unsafe_allow_html = True ,
)
2021-11-19 11:34:32 +01:00
# Sidebar
2021-09-27 16:40:25 +02:00
st . sidebar . header ( " Options " )
2021-11-30 18:11:54 +01:00
top_k_reader = st . sidebar . slider (
2021-12-03 15:58:47 +01:00
" Max. number of answers " ,
min_value = 1 ,
max_value = 10 ,
value = DEFAULT_NUMBER_OF_ANSWERS ,
step = 1 ,
2022-02-03 13:43:18 +01:00
on_change = reset_results ,
)
2021-11-30 18:11:54 +01:00
top_k_retriever = st . sidebar . slider (
2021-12-03 15:58:47 +01:00
" Max. number of documents from retriever " ,
min_value = 1 ,
max_value = 10 ,
value = DEFAULT_DOCS_FROM_RETRIEVER ,
step = 1 ,
2022-02-03 13:43:18 +01:00
on_change = reset_results ,
)
2021-09-27 16:40:25 +02:00
eval_mode = st . sidebar . checkbox ( " Evaluation mode " )
debug = st . sidebar . checkbox ( " Show debug info " )
2021-11-19 11:34:32 +01:00
# File upload block
if not DISABLE_FILE_UPLOAD :
st . sidebar . write ( " ## File Upload: " )
data_files = st . sidebar . file_uploader ( " " , type = [ " pdf " , " txt " , " docx " ] , accept_multiple_files = True )
for data_file in data_files :
# Upload file
if data_file :
raw_json = upload_doc ( data_file )
st . sidebar . write ( str ( data_file . name ) + " ✅ " )
if debug :
st . subheader ( " REST API JSON response " )
st . sidebar . write ( raw_json )
2021-12-01 22:25:59 +01:00
hs_version = " "
2021-11-22 19:06:08 +01:00
try :
hs_version = f " <small>(v { haystack_version ( ) } )</small> "
except Exception :
pass
2022-02-03 13:43:18 +01:00
st . sidebar . markdown (
f """
2021-11-19 11:34:32 +01:00
< style >
a { {
text - decoration : none ;
} }
. haystack - footer { {
text - align : center ;
} }
. haystack - footer h4 { {
2021-12-03 15:58:47 +01:00
margin : 0.1 rem ;
2021-11-19 11:34:32 +01:00
padding : 0 ;
} }
footer { {
opacity : 0 ;
} }
< / style >
< div class = " haystack-footer " >
< hr / >
2021-11-22 19:06:08 +01:00
< h4 > Built with < a href = " https://www.deepset.ai/haystack " > Haystack < / a > { hs_version } < / h4 >
2021-11-19 11:34:32 +01:00
< p > Get it on < a href = " https://github.com/deepset-ai/haystack/ " > GitHub < / a > & nbsp ; & nbsp ; - & nbsp ; & nbsp ; Read the < a href = " https://haystack.deepset.ai/overview/intro " > Docs < / a > < / p >
< small > Data crawled from < a href = " https://en.wikipedia.org/wiki/Category:Lists_of_countries_by_continent " > Wikipedia < / a > in November 2021. < br / > See the < a href = " https://creativecommons.org/licenses/by-sa/3.0/ " > License < / a > ( CC BY - SA 3.0 ) . < / small >
< / div >
2022-02-03 13:43:18 +01:00
""" ,
unsafe_allow_html = True ,
)
2021-11-19 11:34:32 +01:00
# Load csv into pandas dataframe
2021-11-24 15:55:44 +01:00
try :
df = pd . read_csv ( EVAL_LABELS , sep = " ; " )
except Exception :
2022-02-03 13:43:18 +01:00
st . error (
f " The eval file was not found. Please check the demo ' s [README](https://github.com/deepset-ai/haystack/tree/master/ui/README.md) for more information. "
)
sys . exit (
f " The eval file was not found under ` { EVAL_LABELS } `. Please check the README (https://github.com/deepset-ai/haystack/tree/master/ui/README.md) for more information. "
)
2021-04-30 14:16:30 +05:30
2021-09-27 16:40:25 +02:00
# Search bar
2022-02-03 13:43:18 +01:00
question = st . text_input ( " " , value = st . session_state . question , max_chars = 100 , on_change = reset_results )
2021-11-24 15:55:44 +01:00
col1 , col2 = st . columns ( 2 )
col1 . markdown ( " <style>.stButton button { width:100 % ;}</style> " , unsafe_allow_html = True )
col2 . markdown ( " <style>.stButton button { width:100 % ;}</style> " , unsafe_allow_html = True )
# Run button
2021-12-01 22:25:59 +01:00
run_pressed = col1 . button ( " Run " )
2021-11-24 15:55:44 +01:00
# Get next random question from the CSV
2021-12-01 22:25:59 +01:00
if col2 . button ( " Random question " ) :
2021-11-24 15:55:44 +01:00
reset_results ( )
2021-12-03 15:58:47 +01:00
new_row = df . sample ( 1 )
2022-02-03 13:43:18 +01:00
while (
new_row [ " Question Text " ] . values [ 0 ] == st . session_state . question
) : # Avoid picking the same question twice (the change is not visible on the UI)
2021-11-24 15:55:44 +01:00
new_row = df . sample ( 1 )
2022-01-18 14:59:42 +01:00
st . session_state . question = new_row [ " Question Text " ] . values [ 0 ]
st . session_state . answer = new_row [ " Answer " ] . values [ 0 ]
st . session_state . random_question_requested = True
2021-11-24 15:55:44 +01:00
# Re-runs the script setting the random question as the textbox value
# Unfortunately necessary as the Random Question button is _below_ the textbox
raise st . script_runner . RerunException ( st . script_request_queue . RerunData ( None ) )
2022-02-21 20:16:14 +01:00
st . session_state . random_question_requested = False
2022-02-03 13:43:18 +01:00
run_query = (
run_pressed or question != st . session_state . question
) and not st . session_state . random_question_requested
2021-11-19 11:34:32 +01:00
# Check the connection
2021-09-27 16:40:25 +02:00
with st . spinner ( " ⌛️ Haystack is starting... " ) :
if not haystack_is_ready ( ) :
st . error ( " 🚫 Connection Error. Is Haystack running? " )
run_query = False
2021-11-19 11:34:32 +01:00
reset_results ( )
2021-09-27 16:40:25 +02:00
# Get results for query
2021-11-19 11:34:32 +01:00
if run_query and question :
reset_results ( )
2022-01-18 14:59:42 +01:00
st . session_state . question = question
2021-09-27 16:40:25 +02:00
with st . spinner (
" 🧠 Performing neural search on documents... \n "
" Do you want to optimize speed or accuracy? \n "
" Check out the docs: https://haystack.deepset.ai/usage/optimization "
) :
try :
2022-02-03 13:43:18 +01:00
st . session_state . results , st . session_state . raw_json = query (
question , top_k_reader = top_k_reader , top_k_retriever = top_k_retriever
)
2021-11-22 19:06:08 +01:00
except JSONDecodeError as je :
st . error ( " 👓 An error occurred reading the results. Is the document store working? " )
return
2021-09-27 16:40:25 +02:00
except Exception as e :
logging . exception ( e )
2021-11-30 18:11:54 +01:00
if " The server is busy processing requests " in str ( e ) or " 503 " in str ( e ) :
2021-11-19 11:34:32 +01:00
st . error ( " 🧑🌾 All our workers are busy! Try again later. " )
else :
2021-11-29 17:03:54 +01:00
st . error ( " 🐞 An error occurred during the request. " )
2021-09-27 16:40:25 +02:00
return
2022-01-18 14:59:42 +01:00
if st . session_state . results :
2021-11-19 11:34:32 +01:00
# Show the gold answer if we use a question of the given set
2022-01-18 14:59:42 +01:00
if eval_mode and st . session_state . answer :
2021-12-06 18:55:39 +01:00
st . write ( " ## Correct answer: " )
2022-01-18 14:59:42 +01:00
st . write ( st . session_state . answer )
2021-09-27 16:40:25 +02:00
st . write ( " ## Results: " )
2022-01-18 14:59:42 +01:00
for count , result in enumerate ( st . session_state . results ) :
2021-09-27 16:40:25 +02:00
if result [ " answer " ] :
2021-11-19 11:34:32 +01:00
answer , context = result [ " answer " ] , result [ " context " ]
start_idx = context . find ( answer )
end_idx = start_idx + len ( answer )
2021-12-03 15:58:47 +01:00
# Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190
2022-02-03 13:43:18 +01:00
st . write (
markdown ( context [ : start_idx ] + str ( annotation ( answer , " ANSWER " , " #8ef " ) ) + context [ end_idx : ] ) ,
unsafe_allow_html = True ,
)
2021-12-02 13:37:23 +01:00
source = " "
url , title = get_backlink ( result )
if url and title :
source = f " [ { result [ ' document ' ] [ ' meta ' ] [ ' title ' ] } ]( { result [ ' document ' ] [ ' meta ' ] [ ' url ' ] } ) "
else :
source = f " { result [ ' source ' ] } "
st . markdown ( f " **Relevance:** { result [ ' relevance ' ] } - **Source:** { source } " )
2021-11-19 11:34:32 +01:00
2021-11-22 19:06:08 +01:00
else :
2022-02-03 13:43:18 +01:00
st . info (
" 🤔 Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it! "
)
2021-11-22 19:06:08 +01:00
st . write ( " **Relevance:** " , result [ " relevance " ] )
2021-12-03 15:58:47 +01:00
2021-11-29 19:42:10 +01:00
if eval_mode and result [ " answer " ] :
2021-09-27 16:40:25 +02:00
# Define columns for buttons
2021-11-29 17:03:54 +01:00
is_correct_answer = None
is_correct_document = None
2021-11-19 11:34:32 +01:00
button_col1 , button_col2 , button_col3 , _ = st . columns ( [ 1 , 1 , 1 , 6 ] )
if button_col1 . button ( " 👍 " , key = f " { result [ ' context ' ] } { count } 1 " , help = " Correct answer " ) :
2022-02-03 13:43:18 +01:00
is_correct_answer = True
is_correct_document = True
2021-11-19 11:34:32 +01:00
if button_col2 . button ( " 👎 " , key = f " { result [ ' context ' ] } { count } 2 " , help = " Wrong answer and wrong passage " ) :
2022-02-03 13:43:18 +01:00
is_correct_answer = False
is_correct_document = False
2021-11-19 11:34:32 +01:00
2022-02-03 13:43:18 +01:00
if button_col3 . button (
" 👎👍 " , key = f " { result [ ' context ' ] } { count } 3 " , help = " Wrong answer, but correct passage "
) :
is_correct_answer = False
is_correct_document = True
2021-11-29 17:03:54 +01:00
if is_correct_answer is not None and is_correct_document is not None :
try :
send_feedback (
query = question ,
answer_obj = result [ " _raw " ] ,
is_correct_answer = is_correct_answer ,
is_correct_document = is_correct_document ,
2022-02-03 13:43:18 +01:00
document = result [ " document " ] ,
2021-11-29 17:03:54 +01:00
)
st . success ( " ✨ Thanks for your feedback! ✨ " )
except Exception as e :
logging . exception ( e )
st . error ( " 🐞 An error occurred while submitting your feedback! " )
2021-09-27 16:40:25 +02:00
st . write ( " ___ " )
2021-11-19 11:34:32 +01:00
2021-09-27 16:40:25 +02:00
if debug :
st . subheader ( " REST API JSON response " )
2022-01-18 14:59:42 +01:00
st . write ( st . session_state . raw_json )
2021-11-19 11:34:32 +01:00
2022-02-03 13:43:18 +01:00
2021-09-27 16:40:25 +02:00
main ( )