diff --git a/data/operation_dulce/dataset.zip b/data/operation_dulce/dataset.zip index 6c0a3104..40088518 100644 Binary files a/data/operation_dulce/dataset.zip and b/data/operation_dulce/dataset.zip differ diff --git a/posts/config/env_vars/index.html b/posts/config/env_vars/index.html index 1d16ac43..5eacc5c9 100644 --- a/posts/config/env_vars/index.html +++ b/posts/config/env_vars/index.html @@ -852,7 +852,7 @@ a { The chunk size in tokens for text-chunk analysis windows. str optional -300 +1200 GRAPHRAG_CHUNK_OVERLAP @@ -894,7 +894,7 @@ a { The maximum number of redrives (gleanings) to invoke when extracting entities in a loop. int optional -0 +1 GRAPHRAG_ENTITY_EXTRACTION_ENTITY_TYPES @@ -943,7 +943,7 @@ a { The maximum number of redrives (gleanings) to invoke when extracting claims in a loop. int optional -0 +1 GRAPHRAG_COMMUNITY_REPORTS_PROMPT_FILE diff --git a/posts/config/template/index.html b/posts/config/template/index.html index d1ae172a..83f4a460 100644 --- a/posts/config/template/index.html +++ b/posts/config/template/index.html @@ -399,19 +399,19 @@ the --root parameter on your Indexing Pipeline execution.

# GRAPHRAG_INPUT_ENCODING=utf-8 # Data Chunking -# GRAPHRAG_CHUNK_SIZE=300 +# GRAPHRAG_CHUNK_SIZE=1200 # GRAPHRAG_CHUNK_OVERLAP=100 # GRAPHRAG_CHUNK_BY_COLUMNS=id # Prompting Overrides # GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE=None -# GRAPHRAG_ENTITY_EXTRACTION_MAX_GLEANINGS=0 +# GRAPHRAG_ENTITY_EXTRACTION_MAX_GLEANINGS=1 # GRAPHRAG_ENTITY_EXTRACTION_ENTITY_TYPES=organization,person,event,geo # GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE=None # GRAPHRAG_SUMMARIZE_DESCRIPTIONS_MAX_LENGTH=500 # GRAPHRAG_CLAIM_EXTRACTION_DESCRIPTION="Any claims or facts that could be relevant to threat analysis." # GRAPHRAG_CLAIM_EXTRACTION_PROMPT_FILE=None -# GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS=0 +# GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS=1 # GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE=None # GRAPHRAG_COMMUNITY_REPORT_MAX_LENGTH=1500