2024-07-01 15:25:30 -06:00
|
|
|
---
|
|
|
|
title: Configuration Template
|
|
|
|
navtitle: Configuration Template
|
|
|
|
layout: page
|
|
|
|
tags: [post]
|
|
|
|
date: 2024-04-04
|
|
|
|
---
|
|
|
|
|
|
|
|
The following template can be used and stored as a `.env` in the the directory where you're are pointing
|
|
|
|
the `--root` parameter on your Indexing Pipeline execution.
|
|
|
|
|
|
|
|
For details about how to run the Indexing Pipeline, refer to the [Index CLI](../../index/2-cli) documentation.
|
|
|
|
|
|
|
|
## .env File Template
|
|
|
|
|
|
|
|
Required variables are uncommented. All the optional configuration can be turned on or off as needed.
|
|
|
|
|
|
|
|
### Minimal Configuration
|
|
|
|
|
|
|
|
```bash
|
|
|
|
# Base LLM Settings
|
|
|
|
GRAPHRAG_API_KEY="your_api_key"
|
|
|
|
GRAPHRAG_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users
|
|
|
|
GRAPHRAG_API_VERSION="api_version" # For Azure OpenAI Users
|
|
|
|
|
|
|
|
# Text Generation Settings
|
|
|
|
GRAPHRAG_LLM_TYPE="azure_openai_chat" # or openai_chat
|
|
|
|
GRAPHRAG_LLM_DEPLOYMENT_NAME="gpt-4-turbo-preview"
|
|
|
|
GRAPHRAG_LLM_MODEL_SUPPORTS_JSON=True
|
|
|
|
|
|
|
|
# Text Embedding Settings
|
|
|
|
GRAPHRAG_EMBEDDING_TYPE="azure_openai_embedding" # or openai_embedding
|
|
|
|
GRAPHRAG_LLM_DEPLOYMENT_NAME="text-embedding-3-small"
|
|
|
|
|
|
|
|
# Data Mapping Settings
|
|
|
|
GRAPHRAG_INPUT_TYPE="text"
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
### Full Configuration
|
|
|
|
|
|
|
|
```bash
|
|
|
|
|
|
|
|
# Required LLM Config
|
|
|
|
|
|
|
|
# Input Data Configuration
|
|
|
|
GRAPHRAG_INPUT_TYPE="file"
|
|
|
|
|
|
|
|
# Plaintext Input Data Configuration
|
|
|
|
# GRAPHRAG_INPUT_FILE_PATTERN=.*\.txt
|
|
|
|
|
|
|
|
# Text Input Data Configuration
|
|
|
|
GRAPHRAG_INPUT_FILE_TYPE="text"
|
|
|
|
GRAPHRAG_INPUT_FILE_PATTERN=".*\.txt$"
|
|
|
|
GRAPHRAG_INPUT_SOURCE_COLUMN=source
|
|
|
|
# GRAPHRAG_INPUT_TIMESTAMP_COLUMN=None
|
|
|
|
# GRAPHRAG_INPUT_TIMESTAMP_FORMAT=None
|
|
|
|
# GRAPHRAG_INPUT_TEXT_COLUMN="text"
|
|
|
|
# GRAPHRAG_INPUT_ATTRIBUTE_COLUMNS=id
|
|
|
|
# GRAPHRAG_INPUT_TITLE_COLUMN="title"
|
|
|
|
# GRAPHRAG_INPUT_TYPE="file"
|
|
|
|
# GRAPHRAG_INPUT_CONNECTION_STRING=None
|
|
|
|
# GRAPHRAG_INPUT_CONTAINER_NAME=None
|
|
|
|
# GRAPHRAG_INPUT_BASE_DIR=None
|
|
|
|
|
|
|
|
# Base LLM Settings
|
|
|
|
GRAPHRAG_API_KEY="your_api_key"
|
|
|
|
GRAPHRAG_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users
|
|
|
|
GRAPHRAG_API_VERSION="api_version" # For Azure OpenAI Users
|
|
|
|
# GRAPHRAG_API_ORGANIZATION=None
|
|
|
|
# GRAPHRAG_API_PROXY=None
|
|
|
|
|
|
|
|
# Text Generation Settings
|
|
|
|
# GRAPHRAG_LLM_TYPE=openai_chat
|
|
|
|
GRAPHRAG_LLM_API_KEY="your_api_key" # If GRAPHRAG_API_KEY is not set
|
|
|
|
GRAPHRAG_LLM_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set
|
|
|
|
GRAPHRAG_LLM_API_VERSION="api_version" # For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set
|
|
|
|
GRAPHRAG_LLM_MODEL_SUPPORTS_JSON=True # Suggested by default
|
|
|
|
# GRAPHRAG_LLM_API_ORGANIZATION=None
|
|
|
|
# GRAPHRAG_LLM_API_PROXY=None
|
|
|
|
# GRAPHRAG_LLM_DEPLOYMENT_NAME=None
|
|
|
|
# GRAPHRAG_LLM_MODEL=gpt-4-turbo-preview
|
|
|
|
# GRAPHRAG_LLM_MAX_TOKENS=4000
|
|
|
|
# GRAPHRAG_LLM_REQUEST_TIMEOUT=180
|
|
|
|
# GRAPHRAG_LLM_THREAD_COUNT=50
|
|
|
|
# GRAPHRAG_LLM_THREAD_STAGGER=0.3
|
|
|
|
# GRAPHRAG_LLM_CONCURRENT_REQUESTS=25
|
|
|
|
# GRAPHRAG_LLM_TPM=0
|
|
|
|
# GRAPHRAG_LLM_RPM=0
|
|
|
|
# GRAPHRAG_LLM_MAX_RETRIES=10
|
|
|
|
# GRAPHRAG_LLM_MAX_RETRY_WAIT=10
|
|
|
|
# GRAPHRAG_LLM_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True
|
|
|
|
|
|
|
|
# Text Embedding Settings
|
|
|
|
# GRAPHRAG_EMBEDDING_TYPE=openai_embedding
|
|
|
|
GRAPHRAG_EMBEDDING_API_KEY="your_api_key" # If GRAPHRAG_API_KEY is not set
|
|
|
|
GRAPHRAG_EMBEDDING_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set
|
|
|
|
GRAPHRAG_EMBEDDING_API_VERSION="api_version" # For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set
|
|
|
|
# GRAPHRAG_EMBEDDING_API_ORGANIZATION=None
|
|
|
|
# GRAPHRAG_EMBEDDING_API_PROXY=None
|
|
|
|
# GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME=None
|
|
|
|
# GRAPHRAG_EMBEDDING_MODEL=text-embedding-3-small
|
|
|
|
# GRAPHRAG_EMBEDDING_BATCH_SIZE=16
|
|
|
|
# GRAPHRAG_EMBEDDING_BATCH_MAX_TOKENS=8191
|
|
|
|
# GRAPHRAG_EMBEDDING_TARGET=required
|
|
|
|
# GRAPHRAG_EMBEDDING_SKIP=None
|
|
|
|
# GRAPHRAG_EMBEDDING_THREAD_COUNT=None
|
|
|
|
# GRAPHRAG_EMBEDDING_THREAD_STAGGER=50
|
|
|
|
# GRAPHRAG_EMBEDDING_CONCURRENT_REQUESTS=25
|
|
|
|
# GRAPHRAG_EMBEDDING_TPM=0
|
|
|
|
# GRAPHRAG_EMBEDDING_RPM=0
|
|
|
|
# GRAPHRAG_EMBEDDING_MAX_RETRIES=10
|
|
|
|
# GRAPHRAG_EMBEDDING_MAX_RETRY_WAIT=10
|
|
|
|
# GRAPHRAG_EMBEDDING_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True
|
|
|
|
|
|
|
|
# Data Mapping Settings
|
|
|
|
# GRAPHRAG_INPUT_ENCODING=utf-8
|
|
|
|
|
|
|
|
# Data Chunking
|
2024-07-11 10:22:27 -06:00
|
|
|
# GRAPHRAG_CHUNK_SIZE=1200
|
2024-07-01 15:25:30 -06:00
|
|
|
# GRAPHRAG_CHUNK_OVERLAP=100
|
|
|
|
# GRAPHRAG_CHUNK_BY_COLUMNS=id
|
|
|
|
|
|
|
|
# Prompting Overrides
|
|
|
|
# GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE=None
|
2024-07-11 10:22:27 -06:00
|
|
|
# GRAPHRAG_ENTITY_EXTRACTION_MAX_GLEANINGS=1
|
2024-07-01 15:25:30 -06:00
|
|
|
# GRAPHRAG_ENTITY_EXTRACTION_ENTITY_TYPES=organization,person,event,geo
|
|
|
|
# GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE=None
|
|
|
|
# GRAPHRAG_SUMMARIZE_DESCRIPTIONS_MAX_LENGTH=500
|
|
|
|
# GRAPHRAG_CLAIM_EXTRACTION_DESCRIPTION="Any claims or facts that could be relevant to threat analysis."
|
|
|
|
# GRAPHRAG_CLAIM_EXTRACTION_PROMPT_FILE=None
|
2024-07-11 10:22:27 -06:00
|
|
|
# GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS=1
|
2024-07-01 15:25:30 -06:00
|
|
|
# GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE=None
|
|
|
|
# GRAPHRAG_COMMUNITY_REPORT_MAX_LENGTH=1500
|
|
|
|
|
|
|
|
# Storage
|
|
|
|
# GRAPHRAG_STORAGE_TYPE=file
|
|
|
|
# GRAPHRAG_STORAGE_CONNECTION_STRING=None
|
|
|
|
# GRAPHRAG_STORAGE_CONTAINER_NAME=None
|
|
|
|
# GRAPHRAG_STORAGE_BASE_DIR=None
|
|
|
|
|
|
|
|
# Cache
|
|
|
|
# GRAPHRAG_CACHE_TYPE=file
|
|
|
|
# GRAPHRAG_CACHE_CONNECTION_STRING=None
|
|
|
|
# GRAPHRAG_CACHE_CONTAINER_NAME=None
|
|
|
|
# GRAPHRAG_CACHE_BASE_DIR=None
|
|
|
|
|
|
|
|
# Reporting
|
|
|
|
# GRAPHRAG_REPORTING_TYPE=file
|
|
|
|
# GRAPHRAG_REPORTING_CONNECTION_STRING=None
|
|
|
|
# GRAPHRAG_REPORTING_CONTAINER_NAME=None
|
|
|
|
# GRAPHRAG_REPORTING_BASE_DIR=None
|
|
|
|
|
|
|
|
# Node2Vec Parameters
|
|
|
|
# GRAPHRAG_NODE2VEC_ENABLED=False
|
|
|
|
# GRAPHRAG_NODE2VEC_NUM_WALKS=10
|
|
|
|
# GRAPHRAG_NODE2VEC_WALK_LENGTH=40
|
|
|
|
# GRAPHRAG_NODE2VEC_WINDOW_SIZE=2
|
|
|
|
# GRAPHRAG_NODE2VEC_ITERATIONS=3
|
|
|
|
# GRAPHRAG_NODE2VEC_RANDOM_SEED=597832
|
|
|
|
|
|
|
|
# Data Snapshotting
|
|
|
|
# GRAPHRAG_SNAPSHOT_GRAPHML=False
|
|
|
|
# GRAPHRAG_SNAPSHOT_RAW_ENTITIES=False
|
|
|
|
# GRAPHRAG_SNAPSHOT_TOP_LEVEL_NODES=False
|
|
|
|
|
|
|
|
# Miscellaneous Settings
|
|
|
|
# GRAPHRAG_ASYNC_MODE=asyncio
|
|
|
|
# GRAPHRAG_ENCODING_MODEL=cl100k_base
|
|
|
|
# GRAPHRAG_MAX_CLUSTER_SIZE=10
|
|
|
|
# GRAPHRAG_ENTITY_RESOLUTION_ENABLED=False
|
|
|
|
# GRAPHRAG_SKIP_WORKFLOWS=None
|
|
|
|
# GRAPHRAG_UMAP_ENABLED=False
|
|
|
|
```
|