445 lines
21 KiB
HTML

<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Get Started</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
<link href="https://unpkg.com/prismjs@1.20.0/themes/prism-okaidia.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Primer/19.1.1/tooltips.min.css" crossorigin="anonymous" referrerpolicy="no-referrer">
<style>
html {
padding: 0;
margin: 0;
}
body{
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
padding: 0;
margin: 0;
}
footer{
width: 100%;
height: 32px;
font-size: 12px;
display: flex;
flex-direction: row;
justify-content: center;
gap: 18px;
align-items: center;
color: #5d5d5d;
background: #e9eaeb;
border-top: 1px solid #c4c5c6;
}
#cookiesManager{
cursor: pointer;
color: #485fc7;
}
.page-content {
display: flex;
flex-direction: row;
margin: 0;
padding: 0;
overflow: scroll;
padding: 0;
margin: 0;
}
header {
background-color: lightgrey;
height: 2%;
padding: 10px;
}
nav {
padding: 1em;
min-width: 200px;
}
main {
flex: 1;
padding: 0 5em 0 5em;
}
.logotitle {
font-size: 1.5em;
font-weight: bold;
margin: 5px;
}
.number {
all: unset;
}
.tag.token {
all: unset;
}
main ul {
list-style-type: disc;
padding-left: 30px;
margin-top: 10px;
}
h1 {
font-size: 2rem;
margin-top: 10px;
}
h2 {
font-size: 1.5rem;
margin-top: 10px;
font-weight: 500;
}
h3 {
font-size: 1rem;
margin-top: 10px;
font-weight: 500;
}
p {
margin-top: 10px;
}
</style>
<script type="module" async="">import mermaid from "https://unpkg.com/mermaid@10/dist/mermaid.esm.min.mjs";document.addEventListener('DOMContentLoaded', mermaid.initialize({"loadOnSave":true}));</script>
<script>function showTooltip(o,e){o.trigger.className.includes("tooltipped")||(o.trigger.children[0].className="tooltipped tooltipped-s",o.trigger.children[0].ariaLabel=e)}window.addEventListener("load",()=>{var o=new ClipboardJS(".code-copy");o.on("success",o=>showTooltip(o,"Copied!")),o.on("error",o=>showTooltip(o,"Failed..."))});</script>
<script async="" src="https://cdn.jsdelivr.net/npm/clipboard@2.0.11/dist/clipboard.min.js"></script>
<script src="https://wcpstatic.microsoft.com/mscc/lib/v2/wcp-consent.js" type="text/javascript"></script>
<script>
function onConsentChanged(categoryPreferences) {
console.log("onConsentChanged", categoryPreferences);
}
var siteConsent
function initialize(){
var currentYear = new Date().getFullYear()
document.getElementById("copyright").innerHTML = `©️ ${currentYear} Microsoft`;
window.WcpConsent && WcpConsent.init("en-US", "cookie-banner", function (err, _siteConsent) {
if (!err) {
siteConsent = _siteConsent; //siteConsent is used to get the current consent
} else {
console.log("Error initializing WcpConsent: "+ err);
}
}, onConsentChanged, WcpConsent.themes.light);
}
addEventListener("DOMContentLoaded", initialize)
function manageConsent() {
if(siteConsent.isConsentRequired){
siteConsent.manageConsent();
}
}
</script>
</head>
<body>
<header>
<div id="cookie-banner"></div>
<a href="/graphrag/"><span class="logotitle">GraphRAG</span></a>
</header>
<div class="page-content">
<!-- Sidebar -->
<aside class="menu">
<ul class="menu-list">
<li>
<a href="/graphrag/">Welcome</a>
</li>
<!-- Get Started Links -->
<li>
<a href="/graphrag/posts/get_started/" class="is-active" aria-current="page">Get Started</a>
<a href="/graphrag/posts/developing/">Developing</a>
</li>
<!-- Indexing Links -->
<li>
<a href="/graphrag/posts/index/overview/">Indexing</a>
<ul><li>
<a href="/graphrag/posts/index/0-architecture/">Architecture</a>
</li><li>
<a href="/graphrag/posts/index/1-default_dataflow/">Dataflow</a>
</li><li>
<a href="/graphrag/posts/index/2-cli/">CLI</a>
</li><li>
<a href="/graphrag/posts/index/workflows/overview/">Workflows</a>
<ul hidden=""><li>
<a href="/graphrag/posts/index/workflows/create_base_documents/">create_base_documents</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_base_entity_graph/">create_base_entity_graph</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_final_covariates/">create_final_covariates</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_final_entities/">create_final_entities</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_final_nodes/">create_final_nodes</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_final_text_units/">create_final_text_units</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_base_extracted_entities/">create_base_extracted_entities</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_base_text_units/">create_base_text_units</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_final_communities/">create_final_communities</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_final_community_reports/">create_final_community_reports</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_final_documents/">create_final_documents</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_final_relationships/">create_final_relationships</a>
</li><li>
<a href="/graphrag/posts/index/workflows/create_summarized_entities/">create_summarized_entities</a>
</li></ul>
</li>
<li>
<a href="/graphrag/posts/index/verbs/overview/">Verbs</a>
<ul hidden=""><li>
<a href="/graphrag/posts/index/verbs/genid/">genid</a>
</li><li>
<a href="/graphrag/posts/index/verbs/spread_json/">spread_json</a>
</li><li>
<a href="/graphrag/posts/index/verbs/aggregate/">aggregate</a>
</li><li>
<a href="/graphrag/posts/index/verbs/concat/">concat</a>
</li><li>
<a href="/graphrag/posts/index/verbs/merge/">merge</a>
</li><li>
<a href="/graphrag/posts/index/verbs/text_split/">text_split</a>
</li><li>
<a href="/graphrag/posts/index/verbs/unzip/">unzip</a>
</li><li>
<a href="/graphrag/posts/index/verbs/zip/">zip</a>
</li><li>
<a href="/graphrag/posts/index/verbs/chunk/">chunk</a>
</li><li>
<a href="/graphrag/posts/index/verbs/text_replace/">text_replace</a>
</li><li>
<a href="/graphrag/posts/index/verbs/create_graph/">create_graph</a>
</li><li>
<a href="/graphrag/posts/index/verbs/unpack_graph/">unpack_graph</a>
</li><li>
<a href="/graphrag/posts/index/verbs/layout_graph/">layout_graph</a>
</li><li>
<a href="/graphrag/posts/index/verbs/merge_graphs/">merge_graphs</a>
</li><li>
<a href="/graphrag/posts/index/verbs/cluster_graph/">cluster_graph</a>
</li></ul>
</li>
<li>
<a href="/graphrag/posts/config/overview/">Configuration</a>
<ul>
<li>
<a href="/graphrag/posts/config/env_vars">Using Env Vars</a>
</li>
<li>
<a href="/graphrag/posts/config/json_yaml">Using JSON or YAML</a>
</li>
<li>
<a href="/graphrag/posts/config/custom">Fully Custom</a>
</li>
</ul>
</li>
</ul>
</li>
<!-- Query Links -->
<li>
<a href="/graphrag/posts/query/overview/">Query</a>
<ul><li>
<a href="/graphrag/posts/query/0-global_search/">Global Search</a>
</li><li>
<a href="/graphrag/posts/query/1-local_search/">Local Search</a>
</li><li>
<a href="/graphrag/posts/query/2-question_generation/">Question Generation</a>
</li><li>
<a href="/graphrag/posts/query/3-cli/">CLI</a>
</li><li>
<a href="/graphrag/posts/query/notebooks/overview/">Notebooks</a>
<ul>
<li>
<a href="/graphrag/posts/query/notebooks/global_search_nb">Global Search</a>
</li>
<li>
<a href="/graphrag/posts/query/notebooks/local_search_nb">Local Search</a>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</aside>
<!-- Main Content -->
<main>
<h1>Get Started</h1>
<h2>Requirements</h2>
<p><a href="https://www.python.org/downloads/">Python 3.10 or 3.11</a></p>
<p>To get started with the GraphRAG system, you have a few options:</p>
<p>👉 <a href="https://pypi.org/project/graphrag/">Install from pypi</a>. <br>
👉 <a href="/graphrag/posts/developing">Use it from source</a><br></p>
<h1>Top-Level Packages</h1>
<p><a href="/graphrag/posts/index/overview">Indexing Pipeline Overview</a><br>
<a href="/graphrag/posts/query/overview">Query Engine Overview</a></p>
<h1>Overview</h1>
<p>The following is a simple end-to-end example for using the GraphRAG system.
It shows how to use the system to index some text, and then use the indexed data to answer questions about the documents.</p>
<h1>Install GraphRAG</h1>
<div style="position: relative">
<pre class="language-bash"><code id="code-27" class="language-bash">pip <span class="token function">install</span> graphrag</code></pre>
<button class="code-copy " data-clipboard-target="#code-27" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
</button>
</div>
<h1>Running the Indexer</h1>
<p>Now we need to set up a data project and some initial configuration. Let's set that up. We're using the <a href="/graphrag/posts/config/overview/">default configuration mode</a>, which you can customize as needed using <a href="/graphrag/posts/config/env_vars/">environment variables</a> or using a <a href="/graphrag/posts/config/json_yaml/">config file</a>.</p>
<p>First let's get a sample dataset ready:</p>
<div style="position: relative">
<pre class="language-sh"><code id="code-37" class="language-sh"><span class="token function">mkdir</span> <span class="token parameter variable">-p</span> ./ragtest/input</code></pre>
<button class="code-copy " data-clipboard-target="#code-37" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
</button>
</div>
<p>Now let's get a copy of A Christmas Carol by Charles Dickens from a trusted source</p>
<div style="position: relative">
<pre class="language-sh"><code id="code-41" class="language-sh"><span class="token function">curl</span> https://www.gutenberg.org/cache/epub/24022/pg24022.txt <span class="token operator">></span> ./ragtest/input/book.txt</code></pre>
<button class="code-copy " data-clipboard-target="#code-41" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
</button>
</div>
<p>Next we'll inject some required config variables:</p>
<h2>Set Up Environment Variables</h2>
<p>First let's make sure to setup the required environment variables:</p>
<ul>
<li><code>GRAPHRAG_API_KEY</code> - API Key for executing the model, will fallback to <code>OPENAI_API_KEY</code> if one is not provided.</li>
<li><code>GRAPHRAG_LLM_MODEL</code> - Model to use for Chat Completions.</li>
<li><code>GRAPHRAG_EMBEDDING_MODEL</code> - Model to use for Embeddings.</li>
<li><code>GRAPHRAG_INPUT_TYPE</code> - Type of input data, can be <code>text</code> or <code>csv</code>.</li>
<li><code>GRAPHRAG_API_BASE</code> - Base URL for the Azure OpenAI. Only required for Azure OpenAI users.</li>
<li><code>GRAPHRAG_LLM_DEPLOYMENT_NAME</code> - Deployment name for the Chat Completions model. Only required for Azure OpenAI users.</li>
<li><code>GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME</code> - Deployment name for the Embeddings model. Only required for Azure OpenAI users.</li>
</ul>
<h4><ins>OpenAI</ins></h4>
<div style="position: relative">
<pre class="language-sh"><code id="code-91" class="language-sh"><span class="token builtin class-name">export</span> <span class="token assign-left variable">GRAPHRAG_API_KEY</span><span class="token operator">=</span><span class="token operator">&lt;</span>api_key<span class="token operator">></span> <span class="token operator">&amp;&amp;</span> <span class="token punctuation">\</span>
<span class="token builtin class-name">export</span> <span class="token assign-left variable">GRAPHRAG_LLM_MODEL</span><span class="token operator">=</span><span class="token operator">&lt;</span>chat_completions_model<span class="token operator">></span> <span class="token operator">&amp;&amp;</span> <span class="token punctuation">\</span>
<span class="token builtin class-name">export</span> <span class="token assign-left variable">GRAPHRAG_EMBEDDING_MODEL</span><span class="token operator">=</span><span class="token operator">&lt;</span>embeddings_model<span class="token operator">></span> <span class="token operator">&amp;&amp;</span> <span class="token punctuation">\</span>
<span class="token builtin class-name">export</span> <span class="token assign-left variable">GRAPHRAG_INPUT_TYPE</span><span class="token operator">=</span><span class="token string">"text"</span></code></pre>
<button class="code-copy " data-clipboard-target="#code-91" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
</button>
</div>
<h4><ins>Azure OpenAI</ins></h4>
<div style="position: relative">
<pre class="language-sh"><code id="code-95" class="language-sh"><span class="token builtin class-name">export</span> <span class="token assign-left variable">GRAPHRAG_API_KEY</span><span class="token operator">=</span><span class="token operator">&lt;</span>api_key<span class="token operator">></span> <span class="token operator">&amp;&amp;</span> <span class="token punctuation">\</span>
<span class="token builtin class-name">export</span> <span class="token assign-left variable">GRAPHRAG_LLM_DEPLOYMENT_NAME</span><span class="token operator">=</span><span class="token operator">&lt;</span>chat_completions_model<span class="token operator">></span> <span class="token operator">&amp;&amp;</span> <span class="token punctuation">\</span>
<span class="token builtin class-name">export</span> <span class="token assign-left variable">GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME</span><span class="token operator">=</span><span class="token operator">&lt;</span>embeddings_model<span class="token operator">></span> <span class="token operator">&amp;&amp;</span> <span class="token punctuation">\</span>
<span class="token builtin class-name">export</span> <span class="token assign-left variable">GRAPHRAG_INPUT_TYPE</span><span class="token operator">=</span><span class="token string">"text"</span> <span class="token operator">&amp;&amp;</span> <span class="token punctuation">\</span>
<span class="token builtin class-name">export</span> <span class="token assign-left variable">GRAPHRAG_API_BASE</span><span class="token operator">=</span><span class="token string">"http://&lt;domain>.openai.azure.com"</span></code></pre>
<button class="code-copy " data-clipboard-target="#code-95" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
</button>
</div>
<p>For more details about configuring GraphRAG, see the <a href="/graphrag/posts/config/overview/">configuration documentation</a>.
For more details about using the CLI, refer to the <a href="/graphrag/posts/query/3-cli/">CLI documentation</a>.</p>
<h2>Running the Indexing pipeline</h2>
<p>Finally we'll run the pipeline!</p>
<div style="position: relative">
<pre class="language-sh"><code id="code-105" class="language-sh">python <span class="token parameter variable">-m</span> graphrag.index <span class="token parameter variable">--root</span> ./ragtest</code></pre>
<button class="code-copy " data-clipboard-target="#code-105" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
</button>
</div>
<p><img src="/graphrag/img/pipeline-running.png" alt="pipeline executing from the CLI"></p>
<p>This process will take some time to run. This depends on the size of your input data, what model you're using, and the text chunk size being used (these can be configured in your <code>.env</code> file).
Once the pipeline is complete, you should see a new folder called <code>./ragtest/output/&lt;timestamp&gt;/artifacts</code> with a series of parquet files.</p>
<h1>Using the Query Engine</h1>
<h2>Running the Query Engine</h2>
<p>Now let's ask some questions using this dataset.</p>
<p>Here is an example using Global search to ask a high-level question:</p>
<div style="position: relative">
<pre class="language-sh"><code id="code-124" class="language-sh">python <span class="token parameter variable">-m</span> graphrag.query <span class="token punctuation">\</span>
<span class="token parameter variable">--data</span> ./ragtest/output/<span class="token operator">&lt;</span>timestamp<span class="token operator">></span>/artifacts <span class="token punctuation">\</span>
<span class="token parameter variable">--method</span> global<span class="token punctuation">\</span>
<span class="token string">"What are the top themes in this story?"</span></code></pre>
<button class="code-copy " data-clipboard-target="#code-124" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
</button>
</div>
<p>Here is an example using Local search to ask a more specific question about a particular character:</p>
<div style="position: relative">
<pre class="language-sh"><code id="code-128" class="language-sh">python <span class="token parameter variable">-m</span> graphrag.query <span class="token punctuation">\</span>
<span class="token parameter variable">--data</span> ./ragtest/output/<span class="token operator">&lt;</span>timestamp<span class="token operator">></span>/artifacts <span class="token punctuation">\</span>
<span class="token parameter variable">--method</span> <span class="token builtin class-name">local</span> <span class="token punctuation">\</span>
<span class="token string">"Who is Scrooge, and what are his main relationships?"</span></code></pre>
<button class="code-copy " data-clipboard-target="#code-128" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
</button>
</div>
<p>Please refer to <a href="/graphrag/posts/query/overview">Query Engine</a> docs for detailed information about how to leverage our Local and Global search mechanisms for extracting meaningful insights from data after the Indexer has wrapped up execution.</p>
</main>
</div>
<footer>
<a href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy</a>
|
<a href="https://go.microsoft.com/fwlink/?LinkId=2259814">Consumer Health Privacy</a>
|
<span id="cookiesManager" onClick="manageConsent();">Cookies</span>
|
<a href="https://go.microsoft.com/fwlink/?LinkID=206977">Terms of Use</a>
|
<a href="https://www.microsoft.com/trademarks">Trademarks</a>
|
<a href="https://www.microsoft.com" id="copyright"></a>
|
<a href="https://github.com/microsoft/graphrag">GitHub</a>
</footer>
</body>
</html>