461 lines
20 KiB
HTML

<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Configuration Template</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
<link href="https://unpkg.com/prismjs@1.20.0/themes/prism-okaidia.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Primer/19.1.1/tooltips.min.css" crossorigin="anonymous" referrerpolicy="no-referrer">
<style>
html {
padding: 0;
margin: 0;
}
body{
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
padding: 0;
margin: 0;
}
footer{
width: 100%;
height: 32px;
font-size: 12px;
display: flex;
flex-direction: row;
justify-content: center;
gap: 18px;
align-items: center;
color: #5d5d5d;
background: #e9eaeb;
border-top: 1px solid #c4c5c6;
}
#cookiesManager{
cursor: pointer;
color: #485fc7;
}
.page-content {
display: flex;
flex-direction: row;
margin: 0;
padding: 0;
overflow: scroll;
padding: 0;
margin: 0;
}
header {
background-color: lightgrey;
height: 2%;
padding: 10px;
}
nav {
padding: 1em;
min-width: 200px;
}
main {
flex: 1;
padding: 0 5em 0 5em;
}
.logotitle {
font-size: 1.5em;
font-weight: bold;
margin: 5px;
}
.number {
all: unset;
}
.tag.token {
all: unset;
}
main ul {
list-style-type: disc;
padding-left: 30px;
margin-top: 10px;
}
h1 {
font-size: 2rem;
margin-top: 10px;
}
h2 {
font-size: 1.5rem;
margin-top: 10px;
font-weight: 500;
}
h3 {
font-size: 1rem;
margin-top: 10px;
font-weight: 500;
}
p {
margin-top: 10px;
}
/* Accessibility styling */
a {
color: #485fc7;
text-decoration: underline;
}
.menu-list a {
text-decoration: none;
}
.token.comment, .token.prolog, .token.doctype, .token.cdata {
color: #8093a5;
}
.token.property, .token.tag, .token.constant, .token.symbol, .token.deleted {
color: #ff36ab;
}
</style>
<script type="module" async="">import mermaid from "https://unpkg.com/mermaid@10/dist/mermaid.esm.min.mjs";document.addEventListener('DOMContentLoaded', mermaid.initialize({"loadOnSave":true}));</script>
<script>function showTooltip(o,e){o.trigger.className.includes("tooltipped")||(o.trigger.children[0].className="tooltipped tooltipped-s",o.trigger.children[0].ariaLabel=e)}window.addEventListener("load",()=>{var o=new ClipboardJS(".code-copy");o.on("success",o=>showTooltip(o,"Copied!")),o.on("error",o=>showTooltip(o,"Failed..."))});</script>
<script async="" src="https://cdn.jsdelivr.net/npm/clipboard@2.0.11/dist/clipboard.min.js"></script>
<script src="https://wcpstatic.microsoft.com/mscc/lib/v2/wcp-consent.js" type="text/javascript"></script>
<script>
function onConsentChanged(categoryPreferences) {
console.log("onConsentChanged", categoryPreferences);
}
var siteConsent
function initialize(){
var currentYear = new Date().getFullYear()
document.getElementById("copyright").innerHTML = `©️ ${currentYear} Microsoft`;
window.WcpConsent && WcpConsent.init("en-US", "cookie-banner", function (err, _siteConsent) {
if (!err) {
siteConsent = _siteConsent; //siteConsent is used to get the current consent
} else {
console.log("Error initializing WcpConsent: "+ err);
}
}, onConsentChanged, WcpConsent.themes.light);
}
addEventListener("DOMContentLoaded", initialize)
addEventListener("DOMContentLoaded", checkCookieManager)
function checkCookieManager(){
if(siteConsent.isConsentRequired){
document.getElementById("cookiesManager").style.display = 'block';
document.getElementById("divider").style.display = 'block';
}
else{
document.getElementById("cookiesManager").style.display = 'none';
document.getElementById("divider").style.display = 'none';
}
}
function manageConsent() {
if(siteConsent.isConsentRequired){
siteConsent.manageConsent();
}
}
</script>
</head>
<body>
<header>
<div id="cookie-banner"></div>
<a href="/graphrag/"><span class="logotitle">GraphRAG</span></a>
</header>
<div class="page-content">
<!-- Sidebar -->
<aside class="menu">
<ul class="menu-list">
<li>
<a href="/graphrag/">Welcome</a>
</li>
<!-- Get Started Links -->
<li>
<a href="/graphrag/posts/get_started/">Get Started</a>
<a href="/graphrag/posts/developing/">Developing</a>
</li>
<!-- Indexing Links -->
<li>
<a href="/graphrag/posts/index/overview/">Indexing</a>
<ul><li>
<a href="/graphrag/posts/index/0-architecture/">Architecture</a>
</li><li>
<a href="/graphrag/posts/index/1-default_dataflow/">Dataflow</a>
</li><li>
<a href="/graphrag/posts/index/2-cli/">CLI</a>
</li><li>
<a href="/graphrag/posts/index/3-prompt_tuning/">Prompt Tuning</a>
</li><li>
<a href="/graphrag/posts/config/overview/">Configuration</a>
<ul>
<li>
<a href="/graphrag/posts/config/env_vars">Using Env Vars</a>
</li>
<li>
<a href="/graphrag/posts/config/json_yaml">Using JSON or YAML</a>
</li>
<li>
<a href="/graphrag/posts/config/custom">Fully Custom</a>
</li>
<li>
<a href="/graphrag/posts/config/template">Template</a>
</li>
</ul>
</li>
</ul>
</li>
<!-- Query Links -->
<li>
<a href="/graphrag/posts/query/overview/">Query</a>
<ul><li>
<a href="/graphrag/posts/query/0-global_search/">Global Search</a>
</li><li>
<a href="/graphrag/posts/query/1-local_search/">Local Search</a>
</li><li>
<a href="/graphrag/posts/query/2-question_generation/">Question Generation</a>
</li><li>
<a href="/graphrag/posts/query/3-cli/">CLI</a>
</li><li>
<a href="/graphrag/posts/query/notebooks/overview/">Notebooks</a>
<ul>
<li>
<a href="/graphrag/posts/query/notebooks/global_search_nb">Global Search</a>
</li>
<li>
<a href="/graphrag/posts/query/notebooks/local_search_nb">Local Search</a>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</aside>
<!-- Main Content -->
<main>
<h1>Configuration Template</h1>
<p>The following template can be used and stored as a <code>.env</code> in the the directory where you're are pointing
the <code>--root</code> parameter on your Indexing Pipeline execution.</p>
<p>For details about how to run the Indexing Pipeline, refer to the <a href="../../index/2-cli">Index CLI</a> documentation.</p>
<h2>.env File Template</h2>
<p>Required variables are uncommented. All the optional configuration can be turned on or off as needed.</p>
<h3>Minimal Configuration</h3>
<div style="position: relative">
<pre class="language-bash"><code id="code-15" class="language-bash"><span class="token comment"># Base LLM Settings</span>
<span class="token assign-left variable">GRAPHRAG_API_KEY</span><span class="token operator">=</span><span class="token string">"your_api_key"</span>
<span class="token assign-left variable">GRAPHRAG_API_BASE</span><span class="token operator">=</span><span class="token string">"http://&lt;domain>.openai.azure.com"</span> <span class="token comment"># For Azure OpenAI Users</span>
<span class="token assign-left variable">GRAPHRAG_API_VERSION</span><span class="token operator">=</span><span class="token string">"api_version"</span> <span class="token comment"># For Azure OpenAI Users</span>
<span class="token comment"># Text Generation Settings</span>
<span class="token assign-left variable">GRAPHRAG_LLM_TYPE</span><span class="token operator">=</span><span class="token string">"azure_openai_chat"</span> <span class="token comment"># or openai_chat</span>
<span class="token assign-left variable">GRAPHRAG_LLM_DEPLOYMENT_NAME</span><span class="token operator">=</span><span class="token string">"gpt-4-turbo-preview"</span>
<span class="token assign-left variable">GRAPHRAG_LLM_MODEL_SUPPORTS_JSON</span><span class="token operator">=</span>True
<span class="token comment"># Text Embedding Settings</span>
<span class="token assign-left variable">GRAPHRAG_EMBEDDING_TYPE</span><span class="token operator">=</span><span class="token string">"azure_openai_embedding"</span> <span class="token comment"># or openai_embedding</span>
<span class="token assign-left variable">GRAPHRAG_LLM_DEPLOYMENT_NAME</span><span class="token operator">=</span><span class="token string">"text-embedding-3-small"</span>
<span class="token comment"># Data Mapping Settings</span>
<span class="token assign-left variable">GRAPHRAG_INPUT_TYPE</span><span class="token operator">=</span><span class="token string">"text"</span>
</code></pre>
<button class="code-copy " data-clipboard-target="#code-15" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
</button>
</div>
<h3>Full Configuration</h3>
<div style="position: relative">
<pre class="language-bash"><code id="code-19" class="language-bash">
<span class="token comment"># Required LLM Config</span>
<span class="token comment"># Input Data Configuration</span>
<span class="token assign-left variable">GRAPHRAG_INPUT_TYPE</span><span class="token operator">=</span>text
<span class="token comment"># Plaintext Input Data Configuration</span>
<span class="token comment"># GRAPHRAG_INPUT_FILE_PATTERN=.*\.txt</span>
<span class="token comment"># CSV Input Data Configuration</span>
<span class="token assign-left variable">GRAPHRAG_INPUT_TYPE</span><span class="token operator">=</span><span class="token string">"csv"</span>
<span class="token assign-left variable">GRAPHRAG_INPUT_FILE_PATTERN</span><span class="token operator">=</span><span class="token string">".*\.csv$"</span>
<span class="token assign-left variable">GRAPHRAG_INPUT_SOURCE_COLUMN</span><span class="token operator">=</span>source
<span class="token comment"># GRAPHRAG_INPUT_TIMESTAMP_COLUMN=None</span>
<span class="token comment"># GRAPHRAG_INPUT_TIMESTAMP_FORMAT=None</span>
<span class="token comment"># GRAPHRAG_INPUT_TEXT_COLUMN="text"</span>
<span class="token comment"># GRAPHRAG_INPUT_ATTRIBUTE_COLUMNS=id</span>
<span class="token comment"># GRAPHRAG_INPUT_TITLE_COLUMN="title"</span>
<span class="token comment"># GRAPHRAG_INPUT_STORAGE_TYPE="file"</span>
<span class="token comment"># GRAPHRAG_INPUT_CONNECTION_STRING=None</span>
<span class="token comment"># GRAPHRAG_INPUT_CONTAINER_NAME=None</span>
<span class="token comment"># GRAPHRAG_INPUT_BASE_DIR=None</span>
<span class="token comment"># Base LLM Settings</span>
<span class="token assign-left variable">GRAPHRAG_API_KEY</span><span class="token operator">=</span><span class="token string">"your_api_key"</span>
<span class="token assign-left variable">GRAPHRAG_API_BASE</span><span class="token operator">=</span><span class="token string">"http://&lt;domain>.openai.azure.com"</span> <span class="token comment"># For Azure OpenAI Users</span>
<span class="token assign-left variable">GRAPHRAG_API_VERSION</span><span class="token operator">=</span><span class="token string">"api_version"</span> <span class="token comment"># For Azure OpenAI Users</span>
<span class="token comment"># GRAPHRAG_API_ORGANIZATION=None</span>
<span class="token comment"># GRAPHRAG_API_PROXY=None</span>
<span class="token comment"># Text Generation Settings</span>
<span class="token comment"># GRAPHRAG_LLM_TYPE=openai_chat</span>
<span class="token assign-left variable">GRAPHRAG_LLM_API_KEY</span><span class="token operator">=</span><span class="token string">"your_api_key"</span> <span class="token comment"># If GRAPHRAG_API_KEY is not set</span>
<span class="token assign-left variable">GRAPHRAG_LLM_API_BASE</span><span class="token operator">=</span><span class="token string">"http://&lt;domain>.openai.azure.com"</span> <span class="token comment"># For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set</span>
<span class="token assign-left variable">GRAPHRAG_LLM_API_VERSION</span><span class="token operator">=</span><span class="token string">"api_version"</span> <span class="token comment"># For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set</span>
<span class="token assign-left variable">GRAPHRAG_LLM_MODEL_SUPPORTS_JSON</span><span class="token operator">=</span>True <span class="token comment"># Suggested by default</span>
<span class="token comment"># GRAPHRAG_LLM_API_ORGANIZATION=None</span>
<span class="token comment"># GRAPHRAG_LLM_API_PROXY=None</span>
<span class="token comment"># GRAPHRAG_LLM_DEPLOYMENT_NAME=None</span>
<span class="token comment"># GRAPHRAG_LLM_MODEL=gpt-4-turbo-preview</span>
<span class="token comment"># GRAPHRAG_LLM_MAX_TOKENS=4000</span>
<span class="token comment"># GRAPHRAG_LLM_REQUEST_TIMEOUT=180</span>
<span class="token comment"># GRAPHRAG_LLM_THREAD_COUNT=50</span>
<span class="token comment"># GRAPHRAG_LLM_THREAD_STAGGER=0.3</span>
<span class="token comment"># GRAPHRAG_LLM_CONCURRENT_REQUESTS=25</span>
<span class="token comment"># GRAPHRAG_LLM_TPM=0</span>
<span class="token comment"># GRAPHRAG_LLM_RPM=0</span>
<span class="token comment"># GRAPHRAG_LLM_MAX_RETRIES=10</span>
<span class="token comment"># GRAPHRAG_LLM_MAX_RETRY_WAIT=10</span>
<span class="token comment"># GRAPHRAG_LLM_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True</span>
<span class="token comment"># Text Embedding Settings</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_TYPE=openai_embedding</span>
<span class="token assign-left variable">GRAPHRAG_EMBEDDING_API_KEY</span><span class="token operator">=</span><span class="token string">"your_api_key"</span> <span class="token comment"># If GRAPHRAG_API_KEY is not set</span>
<span class="token assign-left variable">GRAPHRAG_EMBEDDING_API_BASE</span><span class="token operator">=</span><span class="token string">"http://&lt;domain>.openai.azure.com"</span> <span class="token comment"># For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set</span>
<span class="token assign-left variable">GRAPHRAG_EMBEDDING_API_VERSION</span><span class="token operator">=</span><span class="token string">"api_version"</span> <span class="token comment"># For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_API_ORGANIZATION=None</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_API_PROXY=None</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME=None</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_MODEL=text-embedding-3-small</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_BATCH_SIZE=16</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_BATCH_MAX_TOKENS=8191</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_TARGET=required</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_SKIP=None</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_THREAD_COUNT=None</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_THREAD_STAGGER=50</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_CONCURRENT_REQUESTS=25</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_TPM=0</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_RPM=0</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_MAX_RETRIES=10</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_MAX_RETRY_WAIT=10</span>
<span class="token comment"># GRAPHRAG_EMBEDDING_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True</span>
<span class="token comment"># Data Mapping Settings</span>
<span class="token comment"># GRAPHRAG_INPUT_ENCODING=utf-8</span>
<span class="token comment"># Data Chunking</span>
<span class="token comment"># GRAPHRAG_CHUNK_SIZE=300</span>
<span class="token comment"># GRAPHRAG_CHUNK_OVERLAP=100</span>
<span class="token comment"># GRAPHRAG_CHUNK_BY_COLUMNS=id</span>
<span class="token comment"># Prompting Overrides</span>
<span class="token comment"># GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE=None</span>
<span class="token comment"># GRAPHRAG_ENTITY_EXTRACTION_MAX_GLEANINGS=0</span>
<span class="token comment"># GRAPHRAG_ENTITY_EXTRACTION_ENTITY_TYPES=organization,person,event,geo</span>
<span class="token comment"># GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE=None</span>
<span class="token comment"># GRAPHRAG_SUMMARIZE_DESCRIPTIONS_MAX_LENGTH=500</span>
<span class="token comment"># GRAPHRAG_CLAIM_EXTRACTION_DESCRIPTION="Any claims or facts that could be relevant to threat analysis."</span>
<span class="token comment"># GRAPHRAG_CLAIM_EXTRACTION_PROMPT_FILE=None</span>
<span class="token comment"># GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS=0</span>
<span class="token comment"># GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE=None</span>
<span class="token comment"># GRAPHRAG_COMMUNITY_REPORT_MAX_LENGTH=1500</span>
<span class="token comment"># Storage</span>
<span class="token comment"># GRAPHRAG_STORAGE_TYPE=file</span>
<span class="token comment"># GRAPHRAG_STORAGE_CONNECTION_STRING=None</span>
<span class="token comment"># GRAPHRAG_STORAGE_CONTAINER_NAME=None</span>
<span class="token comment"># GRAPHRAG_STORAGE_BASE_DIR=None</span>
<span class="token comment"># Cache</span>
<span class="token comment"># GRAPHRAG_CACHE_TYPE=file</span>
<span class="token comment"># GRAPHRAG_CACHE_CONNECTION_STRING=None</span>
<span class="token comment"># GRAPHRAG_CACHE_CONTAINER_NAME=None</span>
<span class="token comment"># GRAPHRAG_CACHE_BASE_DIR=None</span>
<span class="token comment"># Reporting</span>
<span class="token comment"># GRAPHRAG_REPORTING_TYPE=file</span>
<span class="token comment"># GRAPHRAG_REPORTING_CONNECTION_STRING=None</span>
<span class="token comment"># GRAPHRAG_REPORTING_CONTAINER_NAME=None</span>
<span class="token comment"># GRAPHRAG_REPORTING_BASE_DIR=None</span>
<span class="token comment"># Node2Vec Parameters</span>
<span class="token comment"># GRAPHRAG_NODE2VEC_ENABLED=False</span>
<span class="token comment"># GRAPHRAG_NODE2VEC_NUM_WALKS=10</span>
<span class="token comment"># GRAPHRAG_NODE2VEC_WALK_LENGTH=40</span>
<span class="token comment"># GRAPHRAG_NODE2VEC_WINDOW_SIZE=2</span>
<span class="token comment"># GRAPHRAG_NODE2VEC_ITERATIONS=3</span>
<span class="token comment"># GRAPHRAG_NODE2VEC_RANDOM_SEED=597832</span>
<span class="token comment"># Data Snapshotting</span>
<span class="token comment"># GRAPHRAG_SNAPSHOT_GRAPHML=False</span>
<span class="token comment"># GRAPHRAG_SNAPSHOT_RAW_ENTITIES=False</span>
<span class="token comment"># GRAPHRAG_SNAPSHOT_TOP_LEVEL_NODES=False</span>
<span class="token comment"># Miscellaneous Settings</span>
<span class="token comment"># GRAPHRAG_ASYNC_MODE=asyncio</span>
<span class="token comment"># GRAPHRAG_ENCODING_MODEL=cl100k_base</span>
<span class="token comment"># GRAPHRAG_MAX_CLUSTER_SIZE=10</span>
<span class="token comment"># GRAPHRAG_ENTITY_RESOLUTION_ENABLED=False</span>
<span class="token comment"># GRAPHRAG_SKIP_WORKFLOWS=None</span>
<span class="token comment"># GRAPHRAG_UMAP_ENABLED=False</span></code></pre>
<button class="code-copy " data-clipboard-target="#code-19" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
</button>
</div>
</main>
</div>
<footer>
<a href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy</a>
|
<a href="https://go.microsoft.com/fwlink/?LinkId=2259814">Consumer Health Privacy</a>
|
<span id="cookiesManager" onClick="manageConsent();">Cookies</span>
<span id="divider">|</span>
<a href="https://go.microsoft.com/fwlink/?LinkID=206977">Terms of Use</a>
|
<a href="https://www.microsoft.com/trademarks">Trademarks</a>
|
<a href="https://www.microsoft.com" id="copyright"></a>
|
<a href="https://github.com/microsoft/graphrag">GitHub</a>
</footer>
</body>
</html>