2024-04-04 01:23:24 +00:00
<!doctype html>
< html lang = "en" >
< head >
< meta charset = "utf-8" >
< meta name = "viewport" content = "width=device-width, initial-scale=1.0" >
< title > Global Search Notebook< / title >
< link rel = "stylesheet" href = "https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css" >
< link href = "https://unpkg.com/prismjs@1.20.0/themes/prism-okaidia.css" rel = "stylesheet" >
2024-04-04 15:56:27 +00:00
< link rel = "stylesheet" href = "https://cdnjs.cloudflare.com/ajax/libs/Primer/19.1.1/tooltips.min.css" crossorigin = "anonymous" referrerpolicy = "no-referrer" >
2024-04-04 01:23:24 +00:00
< style >
html {
padding: 0;
margin: 0;
}
body{
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
padding: 0;
margin: 0;
}
footer{
width: 100%;
height: 32px;
font-size: 12px;
display: flex;
flex-direction: row;
justify-content: center;
gap: 18px;
align-items: center;
color: #5d5d5d;
background: #e9eaeb;
border-top: 1px solid #c4c5c6;
}
#cookiesManager{
cursor: pointer;
color: #485fc7;
}
.page-content {
display: flex;
flex-direction: row;
margin: 0;
padding: 0;
overflow: scroll;
padding: 0;
margin: 0;
}
header {
background-color: lightgrey;
height: 2%;
padding: 10px;
}
nav {
padding: 1em;
min-width: 200px;
}
main {
flex: 1;
padding: 0 5em 0 5em;
}
.logotitle {
font-size: 1.5em;
font-weight: bold;
margin: 5px;
}
.number {
all: unset;
}
.tag.token {
all: unset;
}
main ul {
list-style-type: disc;
padding-left: 30px;
margin-top: 10px;
}
h1 {
font-size: 2rem;
margin-top: 10px;
}
h2 {
font-size: 1.5rem;
margin-top: 10px;
font-weight: 500;
}
h3 {
font-size: 1rem;
margin-top: 10px;
font-weight: 500;
}
p {
margin-top: 10px;
}
2024-04-04 18:26:16 +00:00
/* Accessibility styling */
a {
color: #485fc7;
text-decoration: underline;
}
.menu-list a {
text-decoration: none;
}
.token.comment, .token.prolog, .token.doctype, .token.cdata {
color: #8093a5;
}
.token.property, .token.tag, .token.constant, .token.symbol, .token.deleted {
color: #ff36ab;
}
2024-04-04 01:23:24 +00:00
< / style >
2024-04-04 15:56:27 +00:00
< script type = "module" async = "" > import mermaid from "https://unpkg.com/mermaid@10/dist/mermaid.esm.min.mjs" ; document . addEventListener ( 'DOMContentLoaded' , mermaid . initialize ( { "loadOnSave" : true } ) ) ; < / script >
2024-04-04 01:23:24 +00:00
< script > function showTooltip ( o , e ) { o . trigger . className . includes ( "tooltipped" ) || ( o . trigger . children [ 0 ] . className = "tooltipped tooltipped-s" , o . trigger . children [ 0 ] . ariaLabel = e ) } window . addEventListener ( "load" , ( ) => { var o = new ClipboardJS ( ".code-copy" ) ; o . on ( "success" , o => showTooltip ( o , "Copied!" ) ) , o . on ( "error" , o => showTooltip ( o , "Failed..." ) ) } ) ; < / script >
2024-04-04 15:56:27 +00:00
< script async = "" src = "https://cdn.jsdelivr.net/npm/clipboard@2.0.11/dist/clipboard.min.js" > < / script >
2024-04-04 01:23:24 +00:00
< script src = "https://wcpstatic.microsoft.com/mscc/lib/v2/wcp-consent.js" type = "text/javascript" > < / script >
< script >
function onConsentChanged(categoryPreferences) {
console.log("onConsentChanged", categoryPreferences);
}
var siteConsent
function initialize(){
var currentYear = new Date().getFullYear()
document.getElementById("copyright").innerHTML = `©️ ${currentYear} Microsoft`;
window.WcpConsent & & WcpConsent.init("en-US", "cookie-banner", function (err, _siteConsent) {
if (!err) {
siteConsent = _siteConsent; //siteConsent is used to get the current consent
} else {
console.log("Error initializing WcpConsent: "+ err);
}
}, onConsentChanged, WcpConsent.themes.light);
}
addEventListener("DOMContentLoaded", initialize)
function manageConsent() {
if(siteConsent.isConsentRequired){
siteConsent.manageConsent();
}
}
< / script >
< / head >
< body >
< header >
< div id = "cookie-banner" > < / div >
2024-04-04 15:56:27 +00:00
< a href = "/graphrag/" > < span class = "logotitle" > GraphRAG< / span > < / a >
2024-04-04 01:23:24 +00:00
< / header >
< div class = "page-content" >
<!-- Sidebar -->
< aside class = "menu" >
< ul class = "menu-list" >
< li >
2024-04-04 15:56:27 +00:00
< a href = "/graphrag/" > Welcome< / a >
2024-04-04 01:23:24 +00:00
< / li >
<!-- Get Started Links -->
< li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/get_started/" > Get Started< / a >
2024-04-04 01:23:24 +00:00
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/developing/" > Developing< / a >
2024-04-04 01:23:24 +00:00
< / li >
<!-- Indexing Links -->
< li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/overview/" > Indexing< / a >
2024-04-04 01:23:24 +00:00
< ul > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/0-architecture/" > Architecture< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/1-default_dataflow/" > Dataflow< / a >
2024-04-04 16:18:40 +00:00
< / li > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/2-cli/" > CLI< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 19:46:42 +00:00
< a href = "/graphrag/posts/index/3-prompt_tuning/" > Prompt Tuning< / a >
< / li > < li >
2024-04-04 01:23:24 +00:00
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/workflows/overview/" > Workflows< / a >
2024-04-04 01:23:24 +00:00
2024-04-04 15:56:27 +00:00
< ul hidden = "" > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/workflows/create_base_documents/" > create_base_documents< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 17:52:55 +00:00
< a href = "/graphrag/posts/index/workflows/create_base_entity_graph/" > create_base_entity_graph< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/workflows/create_base_extracted_entities/" > create_base_extracted_entities< / a >
2024-04-04 04:23:38 +00:00
< / li > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/workflows/create_base_text_units/" > create_base_text_units< / a >
2024-04-04 16:33:38 +00:00
< / li > < li >
2024-04-04 17:52:55 +00:00
< a href = "/graphrag/posts/index/workflows/create_final_communities/" > create_final_communities< / a >
2024-04-04 01:38:33 +00:00
< / li > < li >
2024-04-04 17:52:55 +00:00
< a href = "/graphrag/posts/index/workflows/create_final_community_reports/" > create_final_community_reports< / a >
2024-04-04 16:33:38 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/workflows/create_final_covariates/" > create_final_covariates< / a >
< / li > < li >
2024-04-04 17:52:55 +00:00
< a href = "/graphrag/posts/index/workflows/create_final_documents/" > create_final_documents< / a >
2024-04-04 16:33:38 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/workflows/create_final_entities/" > create_final_entities< / a >
< / li > < li >
< a href = "/graphrag/posts/index/workflows/create_final_nodes/" > create_final_nodes< / a >
< / li > < li >
2024-04-04 17:52:55 +00:00
< a href = "/graphrag/posts/index/workflows/create_final_relationships/" > create_final_relationships< / a >
2024-04-04 16:33:38 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/workflows/create_final_text_units/" > create_final_text_units< / a >
< / li > < li >
2024-04-04 17:52:55 +00:00
< a href = "/graphrag/posts/index/workflows/create_summarized_entities/" > create_summarized_entities< / a >
2024-04-04 01:23:24 +00:00
< / li > < / ul >
< / li >
< li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/verbs/overview/" > Verbs< / a >
2024-04-04 01:23:24 +00:00
2024-04-04 15:56:27 +00:00
< ul hidden = "" > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/aggregate/" > aggregate< / a >
2024-04-04 04:10:45 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/chunk/" > chunk< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/cluster_graph/" > cluster_graph< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/verbs/concat/" > concat< / a >
2024-04-04 16:33:38 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/create_graph/" > create_graph< / a >
2024-04-04 04:23:38 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/genid/" > genid< / a >
2024-04-04 16:18:40 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/layout_graph/" > layout_graph< / a >
2024-04-04 17:52:55 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/merge/" > merge< / a >
2024-04-04 17:52:55 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/merge_graphs/" > merge_graphs< / a >
< / li > < li >
< a href = "/graphrag/posts/index/verbs/spread_json/" > spread_json< / a >
2024-04-04 16:33:38 +00:00
< / li > < li >
2024-04-04 17:52:55 +00:00
< a href = "/graphrag/posts/index/verbs/text_replace/" > text_replace< / a >
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/text_split/" > text_split< / a >
2024-04-04 04:23:38 +00:00
< / li > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/index/verbs/unpack_graph/" > unpack_graph< / a >
2024-04-04 02:27:06 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/unzip/" > unzip< / a >
2024-04-04 16:33:38 +00:00
< / li > < li >
2024-04-04 18:26:16 +00:00
< a href = "/graphrag/posts/index/verbs/zip/" > zip< / a >
2024-04-04 01:23:24 +00:00
< / li > < / ul >
< / li >
< li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/config/overview/" > Configuration< / a >
2024-04-04 01:23:24 +00:00
< ul >
< li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/config/env_vars" > Using Env Vars< / a >
2024-04-04 01:23:24 +00:00
< / li >
< li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/config/json_yaml" > Using JSON or YAML< / a >
2024-04-04 01:23:24 +00:00
< / li >
< li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/config/custom" > Fully Custom< / a >
2024-04-04 01:23:24 +00:00
< / li >
< / ul >
< / li >
< / ul >
< / li >
<!-- Query Links -->
< li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/query/overview/" > Query< / a >
2024-04-04 01:23:24 +00:00
< ul > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/query/0-global_search/" > Global Search< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/query/1-local_search/" > Local Search< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/query/2-question_generation/" > Question Generation< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/query/3-cli/" > CLI< / a >
2024-04-04 01:23:24 +00:00
< / li > < li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/query/notebooks/overview/" > Notebooks< / a >
2024-04-04 01:23:24 +00:00
< ul >
< li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/query/notebooks/global_search_nb" > Global Search< / a >
2024-04-04 01:23:24 +00:00
< / li >
< li >
2024-04-04 17:11:09 +00:00
< a href = "/graphrag/posts/query/notebooks/local_search_nb" > Local Search< / a >
2024-04-04 01:23:24 +00:00
< / li >
< / ul >
< / li >
< / ul >
< / li >
< / ul >
< / aside >
<!-- Main Content -->
< main >
< h1 > Global Search Notebook< / h1 >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-0" class = "language-python" > < span class = "token triple-quoted-string string" > """
Copyright (c) Microsoft Corporation. All rights reserved.
"""< / span > < / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-0" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-1" class = "language-python" > < span class = "token keyword" > import< / span > os
< span class = "token keyword" > import< / span > pandas < span class = "token keyword" > as< / span > pd
< span class = "token keyword" > import< / span > tiktoken
< span class = "token keyword" > from< / span > graphrag< span class = "token punctuation" > .< / span > query< span class = "token punctuation" > .< / span > llm< span class = "token punctuation" > .< / span > oai< span class = "token punctuation" > .< / span > chat_openai < span class = "token keyword" > import< / span > ChatOpenAI
< span class = "token keyword" > from< / span > graphrag< span class = "token punctuation" > .< / span > query< span class = "token punctuation" > .< / span > llm< span class = "token punctuation" > .< / span > oai< span class = "token punctuation" > .< / span > typing < span class = "token keyword" > import< / span > OpenaiApiType
< span class = "token keyword" > from< / span > graphrag< span class = "token punctuation" > .< / span > query< span class = "token punctuation" > .< / span > < span class = "token builtin" > input< / span > < span class = "token punctuation" > .< / span > loaders< span class = "token punctuation" > .< / span > dfs < span class = "token keyword" > import< / span > read_community_reports
< span class = "token keyword" > from< / span > graphrag< span class = "token punctuation" > .< / span > query< span class = "token punctuation" > .< / span > structured_search< span class = "token punctuation" > .< / span > global_search< span class = "token punctuation" > .< / span > search < span class = "token keyword" > import< / span > GlobalSearch
< span class = "token keyword" > from < / span > graphrag< span class = "token punctuation" > .< / span > query< span class = "token punctuation" > .< / span > structured_search< span class = "token punctuation" > .< / span > global_search< span class = "token punctuation" > .< / span > community_context < span class = "token keyword" > import< / span > GlobalCommunityContext
< span class = "token keyword" > print< / span > < span class = "token punctuation" > (< / span > os< span class = "token punctuation" > .< / span > getcwd< span class = "token punctuation" > (< / span > < span class = "token punctuation" > )< / span > < span class = "token punctuation" > )< / span > < / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-1" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< h2 > Global Search example< / h2 >
< p > Global search method generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole (e.g. What are the most significant values of the herbs mentioned in this notebook?).< / p >
< h3 > LLM setup< / h3 >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-11" class = "language-python" >
api_key < span class = "token operator" > =< / span > < span class = "token string" > "< api_key>"< / span >
api_version < span class = "token operator" > =< / span > < span class = "token string" > "api_version"< / span >
llm_model < span class = "token operator" > =< / span > < span class = "token string" > "model or deployment id"< / span >
llm < span class = "token operator" > =< / span > ChatOpenAI< span class = "token punctuation" > (< / span >
api_key< span class = "token operator" > =< / span > api_key< span class = "token punctuation" > ,< / span >
model< span class = "token operator" > =< / span > llm_model< span class = "token punctuation" > ,< / span >
api_type< span class = "token operator" > =< / span > OpenaiApiType< span class = "token punctuation" > .< / span > OpenAI< span class = "token punctuation" > ,< / span > < span class = "token comment" > # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI< / span >
api_version< span class = "token operator" > =< / span > api_version< span class = "token punctuation" > ,< / span >
max_retries< span class = "token operator" > =< / span > < span class = "token number" > 20< / span >
< span class = "token punctuation" > )< / span >
token_encoder < span class = "token operator" > =< / span > tiktoken< span class = "token punctuation" > .< / span > get_encoding< span class = "token punctuation" > (< / span > < span class = "token string" > "cl100k_base"< / span > < span class = "token punctuation" > )< / span > < / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-11" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< h3 > Load community reports as context for global search< / h3 >
< ul >
< li > Load all community reports from < strong > create_final_community_reports< / strong > table from the ire-indexing engine.< / li >
< / ul >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-22" class = "language-python" > < span class = "token comment" > # parquet files generated from indexing pipeline< / span >
INPUT_DIR < span class = "token operator" > =< / span > < span class = "token string" > "./data"< / span >
COMMUNITY_REPORT_TABLE < span class = "token operator" > =< / span > < span class = "token string" > "create_final_community_reports"< / span >
ENTITY_TABLE < span class = "token operator" > =< / span > < span class = "token string" > "create_final_nodes"< / span >
< span class = "token comment" > # community level in the Leiden community hierarchy from which we will load the community reports< / span >
< span class = "token comment" > # higher value means we use reports on smaller communities (and thus will have more reports to query aga< / span >
COMMUNITY_LEVEL < span class = "token operator" > =< / span > < span class = "token number" > 2< / span >
< / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-22" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-23" class = "language-python" >
entity_df < span class = "token operator" > =< / span > pd< span class = "token punctuation" > .< / span > read_parquet< span class = "token punctuation" > (< / span > < span class = "token string-interpolation" > < span class = "token string" > f"< / span > < span class = "token interpolation" > < span class = "token punctuation" > {< / span > INPUT_DIR< span class = "token punctuation" > }< / span > < / span > < span class = "token string" > /< / span > < span class = "token interpolation" > < span class = "token punctuation" > {< / span > ENTITY_TABLE< span class = "token punctuation" > }< / span > < / span > < span class = "token string" > .parquet"< / span > < / span > < span class = "token punctuation" > )< / span >
entity_df < span class = "token operator" > =< / span > entity_df< span class = "token punctuation" > [< / span > < span class = "token punctuation" > (< / span > entity_df< span class = "token punctuation" > .< / span > < span class = "token builtin" > type< / span > < span class = "token operator" > ==< / span > < span class = "token string" > "entity"< / span > < span class = "token punctuation" > )< / span > < span class = "token operator" > & < / span > < span class = "token punctuation" > (< / span > entity_df< span class = "token punctuation" > .< / span > level< span class = "token operator" > < =< / span > < span class = "token string" > "level_{COMMUNITY_LEVEL}"< / span > < span class = "token punctuation" > )< / span > < span class = "token punctuation" > ]< / span >
entity_df< span class = "token punctuation" > [< / span > < span class = "token string" > "community"< / span > < span class = "token punctuation" > ]< / span > < span class = "token operator" > =< / span > entity_df< span class = "token punctuation" > [< / span > < span class = "token string" > "community"< / span > < span class = "token punctuation" > ]< / span > < span class = "token punctuation" > .< / span > fillna< span class = "token punctuation" > (< / span > < span class = "token operator" > -< / span > < span class = "token number" > 1< / span > < span class = "token punctuation" > )< / span >
entity_df< span class = "token punctuation" > [< / span > < span class = "token string" > "community"< / span > < span class = "token punctuation" > ]< / span > < span class = "token operator" > =< / span > entity_df< span class = "token punctuation" > [< / span > < span class = "token string" > "community"< / span > < span class = "token punctuation" > ]< / span > < span class = "token punctuation" > .< / span > astype< span class = "token punctuation" > (< / span > < span class = "token builtin" > int< / span > < span class = "token punctuation" > )< / span >
2024-04-04 17:11:09 +00:00
entity_df < span class = "token operator" > =< / span > entity_df< span class = "token punctuation" > .< / span > groupby< span class = "token punctuation" > (< / span > < span class = "token punctuation" > [< / span > < span class = "token string" > "title"< / span > < span class = "token punctuation" > ]< / span > < span class = "token punctuation" > )< / span > < span class = "token punctuation" > .< / span > agg< span class = "token punctuation" > (< / span > < span class = "token punctuation" > {< / span > < span class = "token string" > "community"< / span > < span class = "token punctuation" > :< / span > < span class = "token string" > "max"< / span > < span class = "token punctuation" > }< / span > < span class = "token punctuation" > )< / span > < span class = "token punctuation" > .< / span > resetindex< span class = "token punctuation" > (< / span > < span class = "token punctuation" > )< / span >
2024-04-04 01:23:24 +00:00
entity_df< span class = "token punctuation" > [< / span > < span class = "token string" > "community"< / span > < span class = "token punctuation" > ]< / span > < span class = "token operator" > =< / span > entity_df< span class = "token punctuation" > [< / span > < span class = "token string" > "community"< / span > < span class = "token punctuation" > ]< / span > < span class = "token punctuation" > .< / span > astype< span class = "token punctuation" > (< / span > < span class = "token builtin" > str< / span > < span class = "token punctuation" > )< / span >
filtered_community_df < span class = "token operator" > =< / span > entity_df< span class = "token punctuation" > .< / span > rename< span class = "token punctuation" > (< / span > columns< span class = "token operator" > =< / span > < span class = "token punctuation" > {< / span > < span class = "token string" > "community"< / span > < span class = "token punctuation" > :< / span > < span class = "token string" > "community_id"< / span > < span class = "token punctuation" > }< / span > < span class = "token punctuation" > )< / span > < span class = "token punctuation" > [< / span > < span class = "token string" > "community_id"< / span > < span class = "token punctuation" > ]< / span > < span class = "token punctuation" > .< / span > drop_duplicates< span class = "token punctuation" > (< / span > < span class = "token punctuation" > )< / span >
report_df < span class = "token operator" > =< / span > pd< span class = "token punctuation" > .< / span > read_parquet< span class = "token punctuation" > (< / span > < span class = "token string-interpolation" > < span class = "token string" > f"< / span > < span class = "token interpolation" > < span class = "token punctuation" > {< / span > INPUT_DIR< span class = "token punctuation" > }< / span > < / span > < span class = "token string" > /< / span > < span class = "token interpolation" > < span class = "token punctuation" > {< / span > COMMUNITY_REPORT_TABLE< span class = "token punctuation" > }< / span > < / span > < span class = "token string" > .parquet"< / span > < / span > < span class = "token punctuation" > )< / span >
report_df < span class = "token operator" > =< / span > report_df< span class = "token punctuation" > [< / span > report_df< span class = "token punctuation" > .< / span > level < span class = "token operator" > < =< / span > < span class = "token string-interpolation" > < span class = "token string" > f"level_< / span > < span class = "token interpolation" > < span class = "token punctuation" > {< / span > COMMUNITY_LEVEL< span class = "token punctuation" > }< / span > < / span > < span class = "token string" > "< / span > < / span > < span class = "token punctuation" > ]< / span >
report_df< span class = "token punctuation" > [< / span > < span class = "token string" > "rank"< / span > < span class = "token punctuation" > ]< / span > < span class = "token operator" > =< / span > report_df< span class = "token punctuation" > [< / span > < span class = "token string" > "rank"< / span > < span class = "token punctuation" > ]< / span > < span class = "token punctuation" > .< / span > astype< span class = "token punctuation" > (< / span > < span class = "token builtin" > int< / span > < span class = "token punctuation" > )< / span >
report_df < span class = "token operator" > =< / span > report_df< span class = "token punctuation" > .< / span > merge< span class = "token punctuation" > (< / span > filtered_community_df< span class = "token punctuation" > ,< / span > on< span class = "token operator" > =< / span > < span class = "token string" > "community_id"< / span > < span class = "token punctuation" > ,< / span > how< span class = "token operator" > =< / span > < span class = "token string" > "inner"< / span > < span class = "token punctuation" > )< / span >
reports < span class = "token operator" > =< / span > read_community_reports< span class = "token punctuation" > (< / span >
df< span class = "token operator" > =< / span > report_df< span class = "token punctuation" > ,< / span >
id_col< span class = "token operator" > =< / span > < span class = "token string" > "community_id"< / span > < span class = "token punctuation" > ,< / span >
short_id_col< span class = "token operator" > =< / span > < span class = "token string" > "community_id"< / span > < span class = "token punctuation" > ,< / span >
community_col< span class = "token operator" > =< / span > < span class = "token string" > "community_id"< / span > < span class = "token punctuation" > ,< / span >
title_col< span class = "token operator" > =< / span > < span class = "token string" > "title"< / span > < span class = "token punctuation" > ,< / span >
summary_col< span class = "token operator" > =< / span > < span class = "token string" > "summary"< / span > < span class = "token punctuation" > ,< / span >
content_col< span class = "token operator" > =< / span > < span class = "token string" > "full_content"< / span > < span class = "token punctuation" > ,< / span >
rank_col< span class = "token operator" > =< / span > < span class = "token string" > "rank"< / span > < span class = "token punctuation" > ,< / span >
summary_embedding_col< span class = "token operator" > =< / span > < span class = "token boolean" > None< / span > < span class = "token punctuation" > ,< / span >
content_embedding_col< span class = "token operator" > =< / span > < span class = "token boolean" > None< / span > < span class = "token punctuation" > ,< / span >
< span class = "token punctuation" > )< / span >
< span class = "token keyword" > print< / span > < span class = "token punctuation" > (< / span > < span class = "token string-interpolation" > < span class = "token string" > f'Report records: < / span > < span class = "token interpolation" > < span class = "token punctuation" > {< / span > < span class = "token builtin" > len< / span > < span class = "token punctuation" > (< / span > report_df< span class = "token punctuation" > )< / span > < span class = "token punctuation" > }< / span > < / span > < span class = "token string" > '< / span > < / span > < span class = "token punctuation" > )< / span >
report_df< span class = "token punctuation" > .< / span > head< span class = "token punctuation" > (< / span > < span class = "token punctuation" > )< / span > < / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-23" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< h4 > Build global context based on community reports< / h4 >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-27" class = "language-python" > context_builder < span class = "token operator" > =< / span > GlobalCommunityContext< span class = "token punctuation" > (< / span >
community_reports< span class = "token operator" > =< / span > reports< span class = "token punctuation" > ,< / span >
token_encoder< span class = "token operator" > =< / span > token_encoder
< span class = "token punctuation" > )< / span > < / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-27" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< h4 > Perform global search< / h4 >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-31" class = "language-python" > context_builder_params < span class = "token operator" > =< / span > < span class = "token punctuation" > {< / span >
< span class = "token string" > "use_community_summary"< / span > < span class = "token punctuation" > :< / span > < span class = "token boolean" > False< / span > < span class = "token punctuation" > ,< / span > < span class = "token comment" > # False means using full community reports. True means using community short summaries.< / span >
< span class = "token string" > "shuffle_data"< / span > < span class = "token punctuation" > :< / span > < span class = "token boolean" > True< / span > < span class = "token punctuation" > ,< / span >
< span class = "token string" > "include_community_rank"< / span > < span class = "token punctuation" > :< / span > < span class = "token boolean" > True< / span > < span class = "token punctuation" > ,< / span >
< span class = "token string" > "min_community_rank"< / span > < span class = "token punctuation" > :< / span > < span class = "token number" > 0< / span > < span class = "token punctuation" > ,< / span >
< span class = "token string" > "max_tokens"< / span > < span class = "token punctuation" > :< / span > < span class = "token number" > 16000< / span > < span class = "token punctuation" > ,< / span > < span class = "token comment" > # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)< / span >
< span class = "token string" > "context_name"< / span > < span class = "token punctuation" > :< / span > < span class = "token string" > "Reports"< / span > < span class = "token punctuation" > ,< / span >
< span class = "token punctuation" > }< / span >
map_llm_params < span class = "token operator" > =< / span > < span class = "token punctuation" > {< / span >
< span class = "token string" > "max_tokens"< / span > < span class = "token punctuation" > :< / span > < span class = "token number" > 500< / span > < span class = "token punctuation" > ,< / span >
< span class = "token string" > "temperature"< / span > < span class = "token punctuation" > :< / span > < span class = "token number" > 0.0< / span > < span class = "token punctuation" > ,< / span >
< span class = "token punctuation" > }< / span >
reduce_llm_params < span class = "token operator" > =< / span > < span class = "token punctuation" > {< / span >
< span class = "token string" > "max_tokens"< / span > < span class = "token punctuation" > :< / span > < span class = "token number" > 2000< / span > < span class = "token punctuation" > ,< / span > < span class = "token comment" > # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)< / span >
< span class = "token string" > "temperature"< / span > < span class = "token punctuation" > :< / span > < span class = "token number" > 0.0< / span > < span class = "token punctuation" > ,< / span >
< span class = "token punctuation" > }< / span > < / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-31" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-32" class = "language-python" > search_engine < span class = "token operator" > =< / span > GlobalSearch< span class = "token punctuation" > (< / span >
llm< span class = "token operator" > =< / span > llm< span class = "token punctuation" > ,< / span >
context_builder< span class = "token operator" > =< / span > context_builder< span class = "token punctuation" > ,< / span >
token_encoder< span class = "token operator" > =< / span > token_encoder< span class = "token punctuation" > ,< / span >
max_data_tokens < span class = "token operator" > =< / span > < span class = "token number" > 16000< / span > < span class = "token punctuation" > ,< / span > < span class = "token comment" > # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)< / span >
map_llm_params< span class = "token operator" > =< / span > map_llm_params< span class = "token punctuation" > ,< / span >
reduce_llm_params< span class = "token operator" > =< / span > reduce_llm_params< span class = "token punctuation" > ,< / span >
context_builder_params< span class = "token operator" > =< / span > context_builder_params< span class = "token punctuation" > ,< / span >
concurrent_coroutines< span class = "token operator" > =< / span > < span class = "token number" > 32< / span > < span class = "token punctuation" > ,< / span >
response_type< span class = "token operator" > =< / span > < span class = "token string" > "multiple paragraphs"< / span > < span class = "token comment" > # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report< / span >
< span class = "token punctuation" > )< / span > < / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-32" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-33" class = "language-python" > result < span class = "token operator" > =< / span > < span class = "token keyword" > await< / span > search_engine< span class = "token punctuation" > .< / span > asearch< span class = "token punctuation" > (< / span > < span class = "token string" > 'How reliable are medicinal herbs? '< / span > < span class = "token punctuation" > )< / span >
< span class = "token keyword" > print< / span > < span class = "token punctuation" > (< / span > result< span class = "token punctuation" > .< / span > response< span class = "token punctuation" > )< / span > < / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-33" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-34" class = "language-python" > < span class = "token comment" > # inspect the data used to build the context for the LLM responses< / span >
result< span class = "token punctuation" > .< / span > context_data< span class = "token punctuation" > [< / span > < span class = "token string" > "reports"< / span > < span class = "token punctuation" > ]< / span > < / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-34" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< div style = "position: relative" >
< pre class = "language-python" > < code id = "code-35" class = "language-python" > < span class = "token comment" > # inspect number of LLM calls and tokens< / span >
< span class = "token keyword" > print< / span > < span class = "token punctuation" > (< / span > < span class = "token string-interpolation" > < span class = "token string" > f'LLM calls: < / span > < span class = "token interpolation" > < span class = "token punctuation" > {< / span > result< span class = "token punctuation" > .< / span > llm_calls< span class = "token punctuation" > }< / span > < / span > < span class = "token string" > . LLM tokens: < / span > < span class = "token interpolation" > < span class = "token punctuation" > {< / span > result< span class = "token punctuation" > .< / span > prompt_tokens< span class = "token punctuation" > }< / span > < / span > < span class = "token string" > '< / span > < / span > < span class = "token punctuation" > )< / span > < / code > < / pre >
2024-04-04 15:56:27 +00:00
< button class = "code-copy " data-clipboard-target = "#code-35" style = "position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title = "Copy" >
2024-04-04 01:23:24 +00:00
< span style = "display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class = "" > < / span >
< / button >
< / div >
< p > LLM calls: 13. LLM tokens: 184660< / p >
< / main >
< / div >
< footer >
< a href = "https://go.microsoft.com/fwlink/?LinkId=521839" > Privacy< / a >
|
< a href = "https://go.microsoft.com/fwlink/?LinkId=2259814" > Consumer Health Privacy< / a >
|
< span id = "cookiesManager" onClick = "manageConsent();" > Cookies< / span >
|
< a href = "https://go.microsoft.com/fwlink/?LinkID=206977" > Terms of Use< / a >
|
< a href = "https://www.microsoft.com/trademarks" > Trademarks< / a >
|
< a href = "https://www.microsoft.com" id = "copyright" > < / a >
|
< a href = "https://github.com/microsoft/graphrag" > GitHub< / a >
< / footer >
< / body >
< / html >