2024-04-04 01:23:24 +00:00
<!doctype html>
< html lang = "en" >
< head >
< meta charset = "utf-8" >
< meta name = "viewport" content = "width=device-width, initial-scale=1.0" >
< title > Indexer CLI< / title >
< link rel = "stylesheet" href = "https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css" >
< link href = "https://unpkg.com/prismjs@1.20.0/themes/prism-okaidia.css" rel = "stylesheet" >
< link rel = "stylesheet" href = "https://cdnjs.cloudflare.com/ajax/libs/Primer/19.1.1/tooltips.min.css" crossorigin = "anonymous" referrerpolicy = "no-referrer" / >
< style >
html {
padding: 0;
margin: 0;
}
body{
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
padding: 0;
margin: 0;
}
footer{
width: 100%;
height: 32px;
font-size: 12px;
display: flex;
flex-direction: row;
justify-content: center;
gap: 18px;
align-items: center;
color: #5d5d5d;
background: #e9eaeb;
border-top: 1px solid #c4c5c6;
}
#cookiesManager{
cursor: pointer;
color: #485fc7;
}
.page-content {
display: flex;
flex-direction: row;
margin: 0;
padding: 0;
overflow: scroll;
padding: 0;
margin: 0;
}
header {
background-color: lightgrey;
height: 2%;
padding: 10px;
}
nav {
padding: 1em;
min-width: 200px;
}
main {
flex: 1;
padding: 0 5em 0 5em;
}
.logotitle {
font-size: 1.5em;
font-weight: bold;
margin: 5px;
}
.number {
all: unset;
}
.tag.token {
all: unset;
}
main ul {
list-style-type: disc;
padding-left: 30px;
margin-top: 10px;
}
h1 {
font-size: 2rem;
margin-top: 10px;
}
h2 {
font-size: 1.5rem;
margin-top: 10px;
font-weight: 500;
}
h3 {
font-size: 1rem;
margin-top: 10px;
font-weight: 500;
}
p {
margin-top: 10px;
}
< / style >
< script type = "module" async > import mermaid from "https://unpkg.com/mermaid@10/dist/mermaid.esm.min.mjs" ; document . addEventListener ( 'DOMContentLoaded' , mermaid . initialize ( { "loadOnSave" : true } ) ) ; < / script >
< script > function showTooltip ( o , e ) { o . trigger . className . includes ( "tooltipped" ) || ( o . trigger . children [ 0 ] . className = "tooltipped tooltipped-s" , o . trigger . children [ 0 ] . ariaLabel = e ) } window . addEventListener ( "load" , ( ) => { var o = new ClipboardJS ( ".code-copy" ) ; o . on ( "success" , o => showTooltip ( o , "Copied!" ) ) , o . on ( "error" , o => showTooltip ( o , "Failed..." ) ) } ) ; < / script >
< script async src = "https://cdn.jsdelivr.net/npm/clipboard@2.0.11/dist/clipboard.min.js" > < / script >
< script src = "https://wcpstatic.microsoft.com/mscc/lib/v2/wcp-consent.js" type = "text/javascript" > < / script >
< script >
function onConsentChanged(categoryPreferences) {
console.log("onConsentChanged", categoryPreferences);
}
var siteConsent
function initialize(){
var currentYear = new Date().getFullYear()
document.getElementById("copyright").innerHTML = `©️ ${currentYear} Microsoft`;
window.WcpConsent & & WcpConsent.init("en-US", "cookie-banner", function (err, _siteConsent) {
if (!err) {
siteConsent = _siteConsent; //siteConsent is used to get the current consent
} else {
console.log("Error initializing WcpConsent: "+ err);
}
}, onConsentChanged, WcpConsent.themes.light);
}
addEventListener("DOMContentLoaded", initialize)
function manageConsent() {
if(siteConsent.isConsentRequired){
siteConsent.manageConsent();
}
}
< / script >
< / head >
< body >
< header >
< div id = "cookie-banner" > < / div >
< a href = "/" > < span class = "logotitle" > GraphRAG< / span > < / a >
< / header >
< div class = "page-content" >
<!-- Sidebar -->
< aside class = "menu" >
< ul class = "menu-list" >
< li >
< a href = "/" > Welcome< / a >
< / li >
<!-- Get Started Links -->
< li >
< a href = "/_posts/get_started/" > Get Started< / a >
< a href = "/_posts/developing/" > Developing< / a >
< / li >
<!-- Indexing Links -->
< li >
< a href = "/_posts/_index/overview/" > Indexing< / a >
< ul > < li >
< a href = "/_posts/_index/0-architecture/" > Architecture< / a >
< / li > < li >
< a href = "/_posts/_index/1-default_dataflow/" > Dataflow< / a >
< / li > < li >
< a href = "/_posts/_index/2-cli/" class = "is-active" aria-current = "page" > CLI< / a >
< / li > < li >
< a href = "/_posts/_index/_workflows/overview/" > Workflows< / a >
< ul hidden > < li >
< a href = "/_posts/_indexing/_workflows/create_base_document_graph/" > create_base_document_graph< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_base_document_nodes/" > create_base_document_nodes< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_base_documents/" > create_base_documents< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_base_entity_graph/" > create_base_entity_graph< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_base_entity_nodes/" > create_base_entity_nodes< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_base_extracted_entities/" > create_base_extracted_entities< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_base_text_units/" > create_base_text_units< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_final_communities/" > create_final_communities< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_final_community_reports/" > create_final_community_reports< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_final_covariates/" > create_final_covariates< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_final_documents/" > create_final_documents< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_final_entities/" > create_final_entities< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_final_nodes/" > create_final_nodes< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_final_relationships/" > create_final_relationships< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_final_text_units/" > create_final_text_units< / a >
< / li > < li >
< a href = "/_posts/_indexing/_workflows/create_summarized_entities/" > create_summarized_entities< / a >
< / li > < li >
2024-04-04 02:26:49 +00:00
< a href = "/_posts/_index/_workflows/create_base_documents/" > create_base_documents< / a >
< / li > < li >
2024-04-04 02:26:15 +00:00
< a href = "/_posts/_index/_workflows/create_final_communities/" > create_final_communities< / a >
< / li > < li >
2024-04-04 01:50:39 +00:00
< a href = "/_posts/_index/_workflows/create_final_community_reports/" > create_final_community_reports< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 01:50:39 +00:00
< a href = "/_posts/_index/_workflows/create_final_relationships/" > create_final_relationships< / a >
< / li > < li >
< a href = "/_posts/_index/_workflows/create_summarized_entities/" > create_summarized_entities< / a >
< / li > < li >
2024-04-04 02:06:10 +00:00
< a href = "/_posts/_index/_workflows/create_base_entity_graph/" > create_base_entity_graph< / a >
2024-04-04 01:38:33 +00:00
< / li > < li >
2024-04-04 02:26:49 +00:00
< a href = "/_posts/_index/_workflows/create_base_extracted_entities/" > create_base_extracted_entities< / a >
< / li > < li >
2024-04-04 02:26:15 +00:00
< a href = "/_posts/_index/_workflows/create_base_text_units/" > create_base_text_units< / a >
2024-04-04 01:38:33 +00:00
< / li > < li >
2024-04-04 01:50:39 +00:00
< a href = "/_posts/_index/_workflows/create_final_covariates/" > create_final_covariates< / a >
2024-04-04 01:42:23 +00:00
< / li > < li >
< a href = "/_posts/_index/_workflows/create_final_documents/" > create_final_documents< / a >
< / li > < li >
2024-04-04 02:27:06 +00:00
< a href = "/_posts/_index/_workflows/create_final_entities/" > create_final_entities< / a >
< / li > < li >
< a href = "/_posts/_index/_workflows/create_final_nodes/" > create_final_nodes< / a >
< / li > < li >
2024-04-04 01:50:39 +00:00
< a href = "/_posts/_index/_workflows/create_final_text_units/" > create_final_text_units< / a >
2024-04-04 01:23:24 +00:00
< / li > < / ul >
< / li >
< li >
< a href = "/_posts/_index/_verbs/overview/" > Verbs< / a >
< ul hidden > < li >
< a href = "/_posts/_indexing/_verbs/aggregate/" > aggregate< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/chunk/" > chunk< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/cluster_graph/" > cluster_graph< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/concat/" > concat< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/create_graph/" > create_graph< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/genid/" > genid< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/layout_graph/" > layout_graph< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/merge/" > merge< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/merge_graphs/" > merge_graphs< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/noop/" > noop< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/spread_json/" > spread_json< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/text_replace/" > text_replace< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/text_split/" > text_split< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/unpack_graph/" > unpack_graph< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/unzip/" > unzip< / a >
< / li > < li >
< a href = "/_posts/_indexing/_verbs/zip/" > zip< / a >
< / li > < li >
2024-04-04 01:38:33 +00:00
< a href = "/_posts/_index/_verbs/genid/" > genid< / a >
< / li > < li >
2024-04-04 01:23:24 +00:00
< a href = "/_posts/_index/_verbs/spread_json/" > spread_json< / a >
< / li > < li >
2024-04-04 02:27:06 +00:00
< a href = "/_posts/_index/_verbs/unzip/" > unzip< / a >
< / li > < li >
2024-04-04 01:23:24 +00:00
< a href = "/_posts/_index/_verbs/zip/" > zip< / a >
< / li > < li >
< a href = "/_posts/_index/_verbs/aggregate/" > aggregate< / a >
< / li > < li >
< a href = "/_posts/_index/_verbs/concat/" > concat< / a >
< / li > < li >
< a href = "/_posts/_index/_verbs/merge/" > merge< / a >
< / li > < li >
< a href = "/_posts/_index/_verbs/text_split/" > text_split< / a >
< / li > < li >
2024-04-04 02:27:06 +00:00
< a href = "/_posts/_index/_verbs/chunk/" > chunk< / a >
< / li > < li >
< a href = "/_posts/_index/_verbs/text_replace/" > text_replace< / a >
< / li > < li >
2024-04-04 02:26:49 +00:00
< a href = "/_posts/_index/_verbs/unpack_graph/" > unpack_graph< / a >
< / li > < li >
2024-04-04 02:06:10 +00:00
< a href = "/_posts/_index/_verbs/create_graph/" > create_graph< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 02:26:15 +00:00
< a href = "/_posts/_index/_verbs/merge_graphs/" > merge_graphs< / a >
< / li > < li >
2024-04-04 01:23:24 +00:00
< a href = "/_posts/_index/_verbs/layout_graph/" > layout_graph< / a >
< / li > < li >
< a href = "/_posts/_index/_verbs/cluster_graph/" > cluster_graph< / a >
< / li > < / ul >
< / li >
< li >
< a href = "/_posts/_config/overview/" > Configuration< / a >
< ul >
< li >
< a href = "/_posts/_config/env_vars" > Using Env Vars< / a >
< / li >
< li >
< a href = "/_posts/_config/json_yaml" > Using JSON or YAML< / a >
< / li >
< li >
< a href = "/_posts/_config/custom" > Fully Custom< / a >
< / li >
< / ul >
< / li >
< / ul >
< / li >
<!-- Query Links -->
< li >
< a href = "/_posts/_query/overview/" > Query< / a >
< ul > < li >
< a href = "/_posts/_query/0-global_search/" > Global Search< / a >
< / li > < li >
< a href = "/_posts/_query/1-local_search/" > Local Search< / a >
< / li > < li >
< a href = "/_posts/_query/2-question_generation/" > Question Generation< / a >
< / li > < li >
< a href = "/_posts/_query/3-cli/" > CLI< / a >
< / li > < li >
< a href = "/_posts/_query/notebooks/overview/" > Notebooks< / a >
< ul >
< li >
< a href = "/_posts/_query/notebooks/global_search_nb" > Global Search< / a >
< / li >
< li >
< a href = "/_posts/_query/notebooks/local_search_nb" > Local Search< / a >
< / li >
< / ul >
< / li >
< / ul >
< / li >
< / ul >
< / aside >
<!-- Main Content -->
< main >
< h1 > Indexer CLI< / h1 >
< p > The GraphRAG indexer CLI allows for no-code usage of the GraphRAG Indexer.< / p >
< div style = "position: relative" >
< pre class = "language-bash" > < code id = "code-3" class = "language-bash" > python < span class = "token parameter variable" > -m< / span > graphrag.index < span class = "token parameter variable" > --verbose< / span > < span class = "token parameter variable" > --root< / span > < span class = "token operator" > < < / span > /workspace/project/root< span class = "token operator" > >< / span > < span class = "token parameter variable" > --config< / span > < span class = "token operator" > < < / span > custom_config.yml< span class = "token operator" > >< / span >
< span class = "token parameter variable" > --resume< / span > < span class = "token operator" > < < / span > timestamp< span class = "token operator" > >< / span > < span class = "token parameter variable" > --reporter< / span > < span class = "token operator" > < < / span > rich< span class = "token operator" > |< / span > print< span class = "token operator" > |< / span > none< span class = "token operator" > >< / span > < span class = "token parameter variable" > --emit< / span > json,csv,parquet
< span class = "token parameter variable" > --nocache< / span > < / code > < / pre >
< button class = "code-copy "
data-clipboard-target="#code-3"
style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< h2 > CLI Arguments< / h2 >
< ul >
< li > < code > --verbose< / code > - Adds extra logging information during the run.< / li >
< li > < code > --root < data-project-dir> < / code > - the data root directory. This should contain an < code > input< / code > directory with the input data, and an < code > .env< / code > file with environment variables. These are described below.< / li >
< li > < code > --resume < output-timestamp> < / code > - if specified, the pipeline will attempt to resume a prior run. The parquet files from the prior run will be loaded into the system as inputs, and the workflows that generated those files will be skipped. The input value should be the timestamped output folder, e.g. " 20240105-143721" .< / li >
< li > < code > --config < config_file.yml> < / code > - This will opt-out of the Default Configuration mode and execute a custom configuration. If this is used, then none of the environment-variables below will apply.< / li >
< li > < code > --reporter < reporter> < / code > - This will specify the progress reporter to use. The default is < code > rich< / code > . Valid values are < code > rich< / code > , < code > print< / code > , and < code > none< / code > .< / li >
< li > < code > --emit < types> < / code > - This specifies the table output formats the pipeline should emit. The default is < code > parquet< / code > . Valid values are < code > parquet< / code > , < code > csv< / code > , and < code > json< / code > , comma-separated.< / li >
< li > < code > --nocache< / code > - This will disable the caching mechanism. This is useful for debugging and development, but should not be used in production.< / li >
< / ul >
< / main >
< / div >
< footer >
< a href = "https://go.microsoft.com/fwlink/?LinkId=521839" > Privacy< / a >
|
< a href = "https://go.microsoft.com/fwlink/?LinkId=2259814" > Consumer Health Privacy< / a >
|
< span id = "cookiesManager" onClick = "manageConsent();" > Cookies< / span >
|
< a href = "https://go.microsoft.com/fwlink/?LinkID=206977" > Terms of Use< / a >
|
< a href = "https://www.microsoft.com/trademarks" > Trademarks< / a >
|
< a href = "https://www.microsoft.com" id = "copyright" > < / a >
|
< a href = "https://github.com/microsoft/graphrag" > GitHub< / a >
< / footer >
< / body >
< / html >