# parquet files generated from indexing pipeline
-INPUT_DIR ="./data"
+
# parquet files generated from indexing pipeline
+INPUT_DIR ="./inputs/operation dulce"
COMMUNITY_REPORT_TABLE ="create_final_community_reports"
ENTITY_TABLE ="create_final_nodes"# community level in the Leiden community hierarchy from which we will load the community reports# higher value means we use reports on smaller communities (and thus will have more reports to query aga
-COMMUNITY_LEVEL =2
-
context_builder_params ={
- "use_community_summary":False,# False means using full community reports. True means using community short summaries.
+
context_builder_params ={
+ "use_community_summary":False,# False means using full community reports. True means using community short summaries."shuffle_data":True,"include_community_rank":True,"min_community_rank":0,
- "max_tokens":16000,# change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
+ "max_tokens":12_000,# change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)"context_name":"Reports",}
@@ -410,78 +141,60 @@ map_llm_params =}
reduce_llm_params ={
- "max_tokens":2000,# change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)
+ "max_tokens":2000,# change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)"temperature":0.0,}
-
-
-
-
-
-
-
search_engine = GlobalSearch(
- llm=llm,
- context_builder=context_builder,
- token_encoder=token_encoder,
- max_data_tokens =16000,# change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
- map_llm_params=map_llm_params,
- reduce_llm_params=reduce_llm_params,
- context_builder_params=context_builder_params,
- concurrent_coroutines=32,
- response_type="multiple paragraphs"# free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
-)
-
-
-
-
-
-
-
-
result =await search_engine.asearch('How reliable are medicinal herbs?')
-
-print(result.response)
-
-
-
-
-
-
-
-
# inspect the data used to build the context for the LLM responses
-result.context_data["reports"]
-
-
# inspect number of LLM calls and tokens
-print(f'LLM calls: {result.llm_calls}. LLM tokens: {result.prompt_tokens}')
+
search_engine = GlobalSearch(
+ llm=llm,
+ context_builder=context_builder,
+ token_encoder=token_encoder,
+ max_data_tokens=16_000,# change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
+ map_llm_params=map_llm_params,
+ reduce_llm_params=reduce_llm_params,
+ context_builder_params=context_builder_params,
+ concurrent_coroutines=32,
+ response_type="multiple paragraphs",# free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
+)
-
LLM calls: 13. LLM tokens: 184660
-
-
-
-
-
\ No newline at end of file
+
+
result =await search_engine.asearch(
+ "What is the major conflict in this story and who are the protagonist and antagonist?"
+)
+
+print(result.response)
+
+
+
+
+
+
+
+
# inspect the data used to build the context for the LLM responses
+result.context_data["reports"]
+
+
+
+
+
+
+
+
# inspect number of LLM calls and tokens
+print(f"LLM calls: {result.llm_calls}. LLM tokens: {result.prompt_tokens}")
@@ -588,7 +343,7 @@ text_embedder = OpenAIEmbedding=relationships,
covariates=covariates,
entity_text_embeddings=description_embedding_store,
- embedding_vectorstore_key=EntityVectorStoreKey.ID,# if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
+ embedding_vectorstore_key=EntityVectorStoreKey.ID,# if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
text_embedder=text_embedder,
token_encoder=token_encoder,)
-# text_unit_prop: proportion of context window dedicated to related text units
+
# text_unit_prop: proportion of context window dedicated to related text units# community_prop: proportion of context window dedicated to community reports.# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1# conversation_history_max_turns: maximum number of turns to include in the conversation history.
@@ -628,12 +382,12 @@ local_context_params ="include_relationship_weight":True,"include_community_rank":False,"return_candidate_context":False,
- "embedding_vectorstore_key": EntityVectorStoreKey.ID,# set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
- "max_tokens":16000# change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
+ "embedding_vectorstore_key": EntityVectorStoreKey.ID,# set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
+ "max_tokens":12_000,# change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)}
llm_params ={
- "max_tokens":2000,# change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
+ "max_tokens":2_000,# change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)"temperature":0.0,}
@@ -649,7 +403,7 @@ llm_params ==token_encoder,
llm_params=llm_params,
context_builder_params=local_context_params,
- response_type="multiple paragraphs"# free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
+ response_type="multiple paragraphs",# free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report)
@@ -659,7 +413,7 @@ llm_params =Run local search on sample queries
-
result =await search_engine.asearch('What are the healing properties of chamomile?')
+
result =await search_engine.asearch("Tell me about Agent Mercer")print(result.response)
@@ -668,10 +422,9 @@ llm_params =
-
question ="When is it better to harvest chamomile?"
+
question ="Tell me about Dr. Jordan Hayes"
result =await search_engine.asearch(question)
-print(result.response)
-
+print(result.response)
@@ -680,7 +433,7 @@ result =await
Inspecting the context data used to generate the response
-
result.context_data['entities'].head()
+
result.context_data["entities"].head()
@@ -688,7 +441,7 @@ result =await
-
result.context_data['relationships'].head()
+
result.context_data["relationships"].head()
@@ -696,7 +449,7 @@ result =await
-
result.context_data['reports'].head()
+
result.context_data["reports"].head()
@@ -704,7 +457,7 @@ result =await
-
result.context_data['sources'].head()
+
result.context_data["sources"].head()
@@ -729,13 +482,11 @@ result =await
question_history =[
- "Tell me about chamomile",
- "what is its role in herbal medicine?"
+ "Tell me about Agent Mercer",
+ "What happens in Dulce military base?",]
candidate_questions =await question_generator.agenerate(
- question_history=question_history,
- context_data=None,
- question_count=5
+ question_history=question_history, context_data=None, question_count=5)print(candidate_questions.response)