mirror of
https://github.com/microsoft/graphrag.git
synced 2025-09-18 12:44:23 +00:00
Deploying to gh-pages from @ microsoft/graphrag@b3855a5f7f 🚀
This commit is contained in:
parent
2e064dbd09
commit
76b88dbaec
@ -1,299 +1,31 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Global Search Notebook</title>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
|
||||
<link href="https://unpkg.com/prismjs@1.20.0/themes/prism-okaidia.css" rel="stylesheet">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Primer/19.1.1/tooltips.min.css" crossorigin="anonymous" referrerpolicy="no-referrer">
|
||||
<style>
|
||||
html {
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
body{
|
||||
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
footer{
|
||||
width: 100%;
|
||||
height: 32px;
|
||||
font-size: 12px;
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
justify-content: center;
|
||||
gap: 18px;
|
||||
align-items: center;
|
||||
color: #5d5d5d;
|
||||
background: #e9eaeb;
|
||||
border-top: 1px solid #c4c5c6;
|
||||
}
|
||||
|
||||
#cookiesManager{
|
||||
cursor: pointer;
|
||||
color: #485fc7;
|
||||
}
|
||||
|
||||
.page-content {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
overflow: scroll;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
header {
|
||||
background-color: lightgrey;
|
||||
height: 2%;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
nav {
|
||||
padding: 1em;
|
||||
min-width: 200px;
|
||||
}
|
||||
|
||||
main {
|
||||
flex: 1;
|
||||
padding: 0 5em 0 5em;
|
||||
}
|
||||
|
||||
.logotitle {
|
||||
font-size: 1.5em;
|
||||
font-weight: bold;
|
||||
margin: 5px;
|
||||
}
|
||||
|
||||
.number {
|
||||
all: unset;
|
||||
}
|
||||
|
||||
.tag.token {
|
||||
all: unset;
|
||||
}
|
||||
|
||||
main ul {
|
||||
list-style-type: disc;
|
||||
padding-left: 30px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2rem;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 1.5rem;
|
||||
margin-top: 10px;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 1rem;
|
||||
margin-top: 10px;
|
||||
font-weight: 500;
|
||||
}
|
||||
p {
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
/* Accessibility styling */
|
||||
|
||||
a {
|
||||
color: #485fc7;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.menu-list a {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
|
||||
.token.comment, .token.prolog, .token.doctype, .token.cdata {
|
||||
color: #8093a5;
|
||||
}
|
||||
|
||||
.token.property, .token.tag, .token.constant, .token.symbol, .token.deleted {
|
||||
color: #ff36ab;
|
||||
}
|
||||
</style>
|
||||
<script type="module" async="">import mermaid from "https://unpkg.com/mermaid@10/dist/mermaid.esm.min.mjs";document.addEventListener('DOMContentLoaded', mermaid.initialize({"loadOnSave":true}));</script>
|
||||
<script>function showTooltip(o,e){o.trigger.className.includes("tooltipped")||(o.trigger.children[0].className="tooltipped tooltipped-s",o.trigger.children[0].ariaLabel=e)}window.addEventListener("load",()=>{var o=new ClipboardJS(".code-copy");o.on("success",o=>showTooltip(o,"Copied!")),o.on("error",o=>showTooltip(o,"Failed..."))});</script>
|
||||
<script async="" src="https://cdn.jsdelivr.net/npm/clipboard@2.0.11/dist/clipboard.min.js"></script>
|
||||
|
||||
|
||||
<script src="https://wcpstatic.microsoft.com/mscc/lib/v2/wcp-consent.js" type="text/javascript"></script>
|
||||
<script>
|
||||
function onConsentChanged(categoryPreferences) {
|
||||
console.log("onConsentChanged", categoryPreferences);
|
||||
}
|
||||
|
||||
var siteConsent
|
||||
|
||||
function initialize(){
|
||||
var currentYear = new Date().getFullYear()
|
||||
document.getElementById("copyright").innerHTML = `©️ ${currentYear} Microsoft`;
|
||||
window.WcpConsent && WcpConsent.init("en-US", "cookie-banner", function (err, _siteConsent) {
|
||||
if (!err) {
|
||||
siteConsent = _siteConsent; //siteConsent is used to get the current consent
|
||||
} else {
|
||||
console.log("Error initializing WcpConsent: "+ err);
|
||||
}
|
||||
}, onConsentChanged, WcpConsent.themes.light);
|
||||
}
|
||||
|
||||
addEventListener("DOMContentLoaded", initialize)
|
||||
addEventListener("DOMContentLoaded", checkCookieManager)
|
||||
|
||||
function checkCookieManager(){
|
||||
if(siteConsent.isConsentRequired){
|
||||
document.getElementById("cookiesManager").style.display = 'block';
|
||||
document.getElementById("divider").style.display = 'block';
|
||||
}
|
||||
else{
|
||||
document.getElementById("cookiesManager").style.display = 'none';
|
||||
document.getElementById("divider").style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
function manageConsent() {
|
||||
if(siteConsent.isConsentRequired){
|
||||
siteConsent.manageConsent();
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<div id="cookie-banner"></div>
|
||||
<a href="/graphrag/"><span class="logotitle">GraphRAG</span></a>
|
||||
</header>
|
||||
<div class="page-content">
|
||||
<!-- Sidebar -->
|
||||
<aside class="menu">
|
||||
<ul class="menu-list">
|
||||
<li>
|
||||
|
||||
<a href="/graphrag/">Welcome</a>
|
||||
|
||||
</li>
|
||||
|
||||
<!-- Get Started Links -->
|
||||
<li>
|
||||
|
||||
<a href="/graphrag/posts/get_started/">Get Started</a>
|
||||
|
||||
|
||||
<a href="/graphrag/posts/developing/">Developing</a>
|
||||
|
||||
</li>
|
||||
|
||||
<!-- Indexing Links -->
|
||||
<li>
|
||||
|
||||
<a href="/graphrag/posts/index/overview/">Indexing</a>
|
||||
|
||||
<ul><li>
|
||||
<a href="/graphrag/posts/index/0-architecture/">Architecture</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/index/1-default_dataflow/">Dataflow</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/index/2-cli/">CLI</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/index/3-prompt_tuning/">Prompt Tuning</a>
|
||||
</li><li>
|
||||
|
||||
<a href="/graphrag/posts/config/overview/">Configuration</a>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="/graphrag/posts/config/env_vars">Using Env Vars</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/graphrag/posts/config/json_yaml">Using JSON or YAML</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/graphrag/posts/config/custom">Fully Custom</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/graphrag/posts/config/template">Template</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
|
||||
<!-- Query Links -->
|
||||
<li>
|
||||
|
||||
<a href="/graphrag/posts/query/overview/">Query</a>
|
||||
|
||||
<ul><li>
|
||||
<a href="/graphrag/posts/query/0-global_search/">Global Search</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/query/1-local_search/">Local Search</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/query/2-question_generation/">Question Generation</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/query/3-cli/">CLI</a>
|
||||
</li><li>
|
||||
|
||||
<a href="/graphrag/posts/query/notebooks/overview/">Notebooks</a>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="/graphrag/posts/query/notebooks/global_search_nb">Global Search</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/graphrag/posts/query/notebooks/local_search_nb">Local Search</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</aside>
|
||||
|
||||
<!-- Main Content -->
|
||||
<main>
|
||||
<h1>Global Search Notebook</h1>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-0" class="language-python"><span class="token triple-quoted-string string">"""
|
||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
"""</span></code></pre>
|
||||
<pre class="language-python"><code id="code-0" class="language-python"><span class="token comment"># Copyright (c) 2024 Microsoft Corporation. All rights reserved.</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-0" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
<p>'\nCopyright (c) Microsoft Corporation. All rights reserved.\n'</p>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-1" class="language-python"><span class="token keyword">import</span> os
|
||||
<pre class="language-python"><code id="code-4" class="language-python"><span class="token keyword">import</span> os
|
||||
<span class="token keyword">from</span> pathlib <span class="token keyword">import</span> Path
|
||||
|
||||
<span class="token keyword">import</span> pandas <span class="token keyword">as</span> pd
|
||||
<span class="token keyword">import</span> tiktoken
|
||||
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span><span class="token builtin">input</span><span class="token punctuation">.</span>loaders<span class="token punctuation">.</span>dfs <span class="token keyword">import</span> read_community_reports
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>llm<span class="token punctuation">.</span>oai<span class="token punctuation">.</span>chat_openai <span class="token keyword">import</span> ChatOpenAI
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>llm<span class="token punctuation">.</span>oai<span class="token punctuation">.</span>typing <span class="token keyword">import</span> OpenaiApiType
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span><span class="token builtin">input</span><span class="token punctuation">.</span>loaders<span class="token punctuation">.</span>dfs <span class="token keyword">import</span> read_community_reports
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>structured_search<span class="token punctuation">.</span>global_search<span class="token punctuation">.</span>community_context <span class="token keyword">import</span> <span class="token punctuation">(</span>
|
||||
GlobalCommunityContext<span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>structured_search<span class="token punctuation">.</span>global_search<span class="token punctuation">.</span>search <span class="token keyword">import</span> GlobalSearch
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>structured_search<span class="token punctuation">.</span>global_search<span class="token punctuation">.</span>community_context <span class="token keyword">import</span> GlobalCommunityContext
|
||||
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span>os<span class="token punctuation">.</span>getcwd<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span>Path<span class="token punctuation">.</span>cwd<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-1" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<button class="code-copy " data-clipboard-target="#code-4" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
@ -302,22 +34,19 @@ Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
<h3>LLM setup</h3>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-11" class="language-python">
|
||||
api_key <span class="token operator">=</span> <span class="token string">"<api_key>"</span>
|
||||
api_version <span class="token operator">=</span> <span class="token string">"api_version"</span>
|
||||
llm_model <span class="token operator">=</span> <span class="token string">"model or deployment id"</span>
|
||||
<pre class="language-python"><code id="code-14" class="language-python">api_key <span class="token operator">=</span> os<span class="token punctuation">.</span>environ<span class="token punctuation">[</span><span class="token string">"GRAPHRAG_API_KEY"</span><span class="token punctuation">]</span>
|
||||
llm_model <span class="token operator">=</span> os<span class="token punctuation">.</span>environ<span class="token punctuation">[</span><span class="token string">"GRAPHRAG_EMBEDDING_MODEL"</span><span class="token punctuation">]</span>
|
||||
|
||||
llm <span class="token operator">=</span> ChatOpenAI<span class="token punctuation">(</span>
|
||||
api_key<span class="token operator">=</span>api_key<span class="token punctuation">,</span>
|
||||
model<span class="token operator">=</span>llm_model<span class="token punctuation">,</span>
|
||||
api_type<span class="token operator">=</span>OpenaiApiType<span class="token punctuation">.</span>OpenAI<span class="token punctuation">,</span> <span class="token comment"># OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI</span>
|
||||
api_version<span class="token operator">=</span>api_version<span class="token punctuation">,</span>
|
||||
max_retries<span class="token operator">=</span><span class="token number">20</span>
|
||||
api_type<span class="token operator">=</span>OpenaiApiType<span class="token punctuation">.</span>OpenAI<span class="token punctuation">,</span> <span class="token comment"># OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI</span>
|
||||
max_retries<span class="token operator">=</span><span class="token number">20</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
|
||||
token_encoder <span class="token operator">=</span> tiktoken<span class="token punctuation">.</span>get_encoding<span class="token punctuation">(</span><span class="token string">"cl100k_base"</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-11" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<button class="code-copy " data-clipboard-target="#code-14" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
@ -327,35 +56,38 @@ token_encoder <span class="token operator">=</span> tiktoken<span class="token p
|
||||
</ul>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-22" class="language-python"><span class="token comment"># parquet files generated from indexing pipeline</span>
|
||||
INPUT_DIR <span class="token operator">=</span> <span class="token string">"./data"</span>
|
||||
<pre class="language-python"><code id="code-25" class="language-python"><span class="token comment"># parquet files generated from indexing pipeline</span>
|
||||
INPUT_DIR <span class="token operator">=</span> <span class="token string">"./inputs/operation dulce"</span>
|
||||
COMMUNITY_REPORT_TABLE <span class="token operator">=</span> <span class="token string">"create_final_community_reports"</span>
|
||||
ENTITY_TABLE <span class="token operator">=</span> <span class="token string">"create_final_nodes"</span>
|
||||
|
||||
<span class="token comment"># community level in the Leiden community hierarchy from which we will load the community reports</span>
|
||||
<span class="token comment"># higher value means we use reports on smaller communities (and thus will have more reports to query aga</span>
|
||||
COMMUNITY_LEVEL <span class="token operator">=</span> <span class="token number">2</span>
|
||||
</code></pre>
|
||||
COMMUNITY_LEVEL <span class="token operator">=</span> <span class="token number">2</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-22" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<button class="code-copy " data-clipboard-target="#code-25" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-23" class="language-python">
|
||||
entity_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>read_parquet<span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"</span><span class="token interpolation"><span class="token punctuation">{</span>INPUT_DIR<span class="token punctuation">}</span></span><span class="token string">/</span><span class="token interpolation"><span class="token punctuation">{</span>ENTITY_TABLE<span class="token punctuation">}</span></span><span class="token string">.parquet"</span></span><span class="token punctuation">)</span>
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token punctuation">(</span>entity_df<span class="token punctuation">.</span><span class="token builtin">type</span><span class="token operator">==</span><span class="token string">"entity"</span><span class="token punctuation">)</span> <span class="token operator">&</span> <span class="token punctuation">(</span>entity_df<span class="token punctuation">.</span>level<span class="token operator"><=</span><span class="token string">"level_{COMMUNITY_LEVEL}"</span><span class="token punctuation">)</span><span class="token punctuation">]</span>
|
||||
<pre class="language-python"><code id="code-26" class="language-python">entity_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>read_parquet<span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"</span><span class="token interpolation"><span class="token punctuation">{</span>INPUT_DIR<span class="token punctuation">}</span></span><span class="token string">/</span><span class="token interpolation"><span class="token punctuation">{</span>ENTITY_TABLE<span class="token punctuation">}</span></span><span class="token string">.parquet"</span></span><span class="token punctuation">)</span>
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">[</span>
|
||||
<span class="token punctuation">(</span>entity_df<span class="token punctuation">.</span><span class="token builtin">type</span> <span class="token operator">==</span> <span class="token string">"entity"</span><span class="token punctuation">)</span> <span class="token operator">&</span> <span class="token punctuation">(</span>entity_df<span class="token punctuation">.</span>level <span class="token operator"><=</span> <span class="token string-interpolation"><span class="token string">f"level_</span><span class="token interpolation"><span class="token punctuation">{</span>COMMUNITY_LEVEL<span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
|
||||
<span class="token punctuation">]</span>
|
||||
entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span> <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>fillna<span class="token punctuation">(</span><span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">)</span>
|
||||
entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span> <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">int</span><span class="token punctuation">)</span>
|
||||
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">.</span>groupby<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">"title"</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">.</span>agg<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">"community"</span><span class="token punctuation">:</span> <span class="token string">"max"</span><span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">.</span>resetindex<span class="token punctuation">(</span><span class="token punctuation">)</span>
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">.</span>groupby<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">"title"</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">.</span>agg<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">"community"</span><span class="token punctuation">:</span> <span class="token string">"max"</span><span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">.</span>reset_index<span class="token punctuation">(</span><span class="token punctuation">)</span>
|
||||
entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span> <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">str</span><span class="token punctuation">)</span>
|
||||
filtered_community_df <span class="token operator">=</span> entity_df<span class="token punctuation">.</span>rename<span class="token punctuation">(</span>columns<span class="token operator">=</span><span class="token punctuation">{</span><span class="token string">"community"</span><span class="token punctuation">:</span> <span class="token string">"community_id"</span><span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token string">"community_id"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>drop_duplicates<span class="token punctuation">(</span><span class="token punctuation">)</span>
|
||||
filtered_community_df <span class="token operator">=</span> entity_df<span class="token punctuation">.</span>rename<span class="token punctuation">(</span>columns<span class="token operator">=</span><span class="token punctuation">{</span><span class="token string">"community"</span><span class="token punctuation">:</span> <span class="token string">"community_id"</span><span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">[</span>
|
||||
<span class="token string">"community_id"</span>
|
||||
<span class="token punctuation">]</span><span class="token punctuation">.</span>drop_duplicates<span class="token punctuation">(</span><span class="token punctuation">)</span>
|
||||
|
||||
report_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>read_parquet<span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"</span><span class="token interpolation"><span class="token punctuation">{</span>INPUT_DIR<span class="token punctuation">}</span></span><span class="token string">/</span><span class="token interpolation"><span class="token punctuation">{</span>COMMUNITY_REPORT_TABLE<span class="token punctuation">}</span></span><span class="token string">.parquet"</span></span><span class="token punctuation">)</span>
|
||||
report_df <span class="token operator">=</span> report_df<span class="token punctuation">[</span>report_df<span class="token punctuation">.</span>level <span class="token operator"><=</span> <span class="token string-interpolation"><span class="token string">f"level_</span><span class="token interpolation"><span class="token punctuation">{</span>COMMUNITY_LEVEL<span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">]</span>
|
||||
|
||||
report_df<span class="token punctuation">[</span><span class="token string">"rank"</span><span class="token punctuation">]</span> <span class="token operator">=</span> report_df<span class="token punctuation">[</span><span class="token string">"rank"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>fillna<span class="token punctuation">(</span><span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">)</span>
|
||||
report_df<span class="token punctuation">[</span><span class="token string">"rank"</span><span class="token punctuation">]</span> <span class="token operator">=</span> report_df<span class="token punctuation">[</span><span class="token string">"rank"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">int</span><span class="token punctuation">)</span>
|
||||
|
||||
report_df <span class="token operator">=</span> report_df<span class="token punctuation">.</span>merge<span class="token punctuation">(</span>filtered_community_df<span class="token punctuation">,</span> on<span class="token operator">=</span><span class="token string">"community_id"</span><span class="token punctuation">,</span> how<span class="token operator">=</span><span class="token string">"inner"</span><span class="token punctuation">)</span>
|
||||
@ -373,34 +105,33 @@ reports <span class="token operator">=</span> read_community_reports<span class=
|
||||
content_embedding_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f'Report records: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>report_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">'</span></span><span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Report records: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>report_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
|
||||
report_df<span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-23" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<button class="code-copy " data-clipboard-target="#code-26" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
<h4>Build global context based on community reports</h4>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-27" class="language-python">context_builder <span class="token operator">=</span> GlobalCommunityContext<span class="token punctuation">(</span>
|
||||
community_reports<span class="token operator">=</span>reports<span class="token punctuation">,</span>
|
||||
token_encoder<span class="token operator">=</span>token_encoder
|
||||
<pre class="language-python"><code id="code-30" class="language-python">context_builder <span class="token operator">=</span> GlobalCommunityContext<span class="token punctuation">(</span>
|
||||
community_reports<span class="token operator">=</span>reports<span class="token punctuation">,</span> token_encoder<span class="token operator">=</span>token_encoder
|
||||
<span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-27" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<button class="code-copy " data-clipboard-target="#code-30" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
<h4>Perform global search</h4>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-31" class="language-python">context_builder_params <span class="token operator">=</span> <span class="token punctuation">{</span>
|
||||
<span class="token string">"use_community_summary"</span><span class="token punctuation">:</span> <span class="token boolean">False</span><span class="token punctuation">,</span> <span class="token comment"># False means using full community reports. True means using community short summaries.</span>
|
||||
<pre class="language-python"><code id="code-34" class="language-python">context_builder_params <span class="token operator">=</span> <span class="token punctuation">{</span>
|
||||
<span class="token string">"use_community_summary"</span><span class="token punctuation">:</span> <span class="token boolean">False</span><span class="token punctuation">,</span> <span class="token comment"># False means using full community reports. True means using community short summaries.</span>
|
||||
<span class="token string">"shuffle_data"</span><span class="token punctuation">:</span> <span class="token boolean">True</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"include_community_rank"</span><span class="token punctuation">:</span> <span class="token boolean">True</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"min_community_rank"</span><span class="token punctuation">:</span> <span class="token number">0</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"max_tokens"</span><span class="token punctuation">:</span> <span class="token number">16000</span><span class="token punctuation">,</span> <span class="token comment"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)</span>
|
||||
<span class="token string">"max_tokens"</span><span class="token punctuation">:</span> <span class="token number">12_000</span><span class="token punctuation">,</span> <span class="token comment"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)</span>
|
||||
<span class="token string">"context_name"</span><span class="token punctuation">:</span> <span class="token string">"Reports"</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">}</span>
|
||||
|
||||
@ -410,78 +141,60 @@ map_llm_params <span class="token operator">=</span> <span class="token punctuat
|
||||
<span class="token punctuation">}</span>
|
||||
|
||||
reduce_llm_params <span class="token operator">=</span> <span class="token punctuation">{</span>
|
||||
<span class="token string">"max_tokens"</span><span class="token punctuation">:</span> <span class="token number">2000</span><span class="token punctuation">,</span> <span class="token comment"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)</span>
|
||||
<span class="token string">"max_tokens"</span><span class="token punctuation">:</span> <span class="token number">2000</span><span class="token punctuation">,</span> <span class="token comment"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)</span>
|
||||
<span class="token string">"temperature"</span><span class="token punctuation">:</span> <span class="token number">0.0</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">}</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-31" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-32" class="language-python">search_engine <span class="token operator">=</span> GlobalSearch<span class="token punctuation">(</span>
|
||||
llm<span class="token operator">=</span>llm<span class="token punctuation">,</span>
|
||||
context_builder<span class="token operator">=</span>context_builder<span class="token punctuation">,</span>
|
||||
token_encoder<span class="token operator">=</span>token_encoder<span class="token punctuation">,</span>
|
||||
max_data_tokens <span class="token operator">=</span> <span class="token number">16000</span><span class="token punctuation">,</span> <span class="token comment"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)</span>
|
||||
map_llm_params<span class="token operator">=</span>map_llm_params<span class="token punctuation">,</span>
|
||||
reduce_llm_params<span class="token operator">=</span>reduce_llm_params<span class="token punctuation">,</span>
|
||||
context_builder_params<span class="token operator">=</span>context_builder_params<span class="token punctuation">,</span>
|
||||
concurrent_coroutines<span class="token operator">=</span><span class="token number">32</span><span class="token punctuation">,</span>
|
||||
response_type<span class="token operator">=</span><span class="token string">"multiple paragraphs"</span> <span class="token comment"># free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report</span>
|
||||
<span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-32" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-33" class="language-python">result <span class="token operator">=</span> <span class="token keyword">await</span> search_engine<span class="token punctuation">.</span>asearch<span class="token punctuation">(</span><span class="token string">'How reliable are medicinal herbs?'</span><span class="token punctuation">)</span>
|
||||
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span>result<span class="token punctuation">.</span>response<span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-33" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-34" class="language-python"><span class="token comment"># inspect the data used to build the context for the LLM responses</span>
|
||||
result<span class="token punctuation">.</span>context_data<span class="token punctuation">[</span><span class="token string">"reports"</span><span class="token punctuation">]</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-34" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-35" class="language-python"><span class="token comment"># inspect number of LLM calls and tokens</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f'LLM calls: </span><span class="token interpolation"><span class="token punctuation">{</span>result<span class="token punctuation">.</span>llm_calls<span class="token punctuation">}</span></span><span class="token string">. LLM tokens: </span><span class="token interpolation"><span class="token punctuation">{</span>result<span class="token punctuation">.</span>prompt_tokens<span class="token punctuation">}</span></span><span class="token string">'</span></span><span class="token punctuation">)</span></code></pre>
|
||||
<pre class="language-python"><code id="code-35" class="language-python">search_engine <span class="token operator">=</span> GlobalSearch<span class="token punctuation">(</span>
|
||||
llm<span class="token operator">=</span>llm<span class="token punctuation">,</span>
|
||||
context_builder<span class="token operator">=</span>context_builder<span class="token punctuation">,</span>
|
||||
token_encoder<span class="token operator">=</span>token_encoder<span class="token punctuation">,</span>
|
||||
max_data_tokens<span class="token operator">=</span><span class="token number">16_000</span><span class="token punctuation">,</span> <span class="token comment"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)</span>
|
||||
map_llm_params<span class="token operator">=</span>map_llm_params<span class="token punctuation">,</span>
|
||||
reduce_llm_params<span class="token operator">=</span>reduce_llm_params<span class="token punctuation">,</span>
|
||||
context_builder_params<span class="token operator">=</span>context_builder_params<span class="token punctuation">,</span>
|
||||
concurrent_coroutines<span class="token operator">=</span><span class="token number">32</span><span class="token punctuation">,</span>
|
||||
response_type<span class="token operator">=</span><span class="token string">"multiple paragraphs"</span><span class="token punctuation">,</span> <span class="token comment"># free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report</span>
|
||||
<span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-35" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
<p>LLM calls: 13. LLM tokens: 184660</p>
|
||||
|
||||
</main>
|
||||
</div>
|
||||
<footer>
|
||||
<a href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy</a>
|
||||
|
|
||||
<a href="https://go.microsoft.com/fwlink/?LinkId=2259814">Consumer Health Privacy</a>
|
||||
|
|
||||
<span id="cookiesManager" onClick="manageConsent();">Cookies</span>
|
||||
<span id="divider">|</span>
|
||||
<a href="https://go.microsoft.com/fwlink/?LinkID=206977">Terms of Use</a>
|
||||
|
|
||||
<a href="https://www.microsoft.com/trademarks">Trademarks</a>
|
||||
|
|
||||
<a href="https://www.microsoft.com" id="copyright"></a>
|
||||
|
|
||||
<a href="https://github.com/microsoft/graphrag">GitHub</a>
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-36" class="language-python">result <span class="token operator">=</span> <span class="token keyword">await</span> search_engine<span class="token punctuation">.</span>asearch<span class="token punctuation">(</span>
|
||||
<span class="token string">"What is the major conflict in this story and who are the protagonist and antagonist?"</span>
|
||||
<span class="token punctuation">)</span>
|
||||
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span>result<span class="token punctuation">.</span>response<span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-36" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-37" class="language-python"><span class="token comment"># inspect the data used to build the context for the LLM responses</span>
|
||||
result<span class="token punctuation">.</span>context_data<span class="token punctuation">[</span><span class="token string">"reports"</span><span class="token punctuation">]</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-37" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-38" class="language-python"><span class="token comment"># inspect number of LLM calls and tokens</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"LLM calls: </span><span class="token interpolation"><span class="token punctuation">{</span>result<span class="token punctuation">.</span>llm_calls<span class="token punctuation">}</span></span><span class="token string">. LLM tokens: </span><span class="token interpolation"><span class="token punctuation">{</span>result<span class="token punctuation">.</span>prompt_tokens<span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-38" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
<p>LLM calls: 13. LLM tokens: 184660</p>
|
||||
|
@ -1,280 +1,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Local Search Notebook</title>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
|
||||
<link href="https://unpkg.com/prismjs@1.20.0/themes/prism-okaidia.css" rel="stylesheet">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Primer/19.1.1/tooltips.min.css" crossorigin="anonymous" referrerpolicy="no-referrer">
|
||||
<style>
|
||||
html {
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
body{
|
||||
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
footer{
|
||||
width: 100%;
|
||||
height: 32px;
|
||||
font-size: 12px;
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
justify-content: center;
|
||||
gap: 18px;
|
||||
align-items: center;
|
||||
color: #5d5d5d;
|
||||
background: #e9eaeb;
|
||||
border-top: 1px solid #c4c5c6;
|
||||
}
|
||||
|
||||
#cookiesManager{
|
||||
cursor: pointer;
|
||||
color: #485fc7;
|
||||
}
|
||||
|
||||
.page-content {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
overflow: scroll;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
header {
|
||||
background-color: lightgrey;
|
||||
height: 2%;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
nav {
|
||||
padding: 1em;
|
||||
min-width: 200px;
|
||||
}
|
||||
|
||||
main {
|
||||
flex: 1;
|
||||
padding: 0 5em 0 5em;
|
||||
}
|
||||
|
||||
.logotitle {
|
||||
font-size: 1.5em;
|
||||
font-weight: bold;
|
||||
margin: 5px;
|
||||
}
|
||||
|
||||
.number {
|
||||
all: unset;
|
||||
}
|
||||
|
||||
.tag.token {
|
||||
all: unset;
|
||||
}
|
||||
|
||||
main ul {
|
||||
list-style-type: disc;
|
||||
padding-left: 30px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2rem;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 1.5rem;
|
||||
margin-top: 10px;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 1rem;
|
||||
margin-top: 10px;
|
||||
font-weight: 500;
|
||||
}
|
||||
p {
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
/* Accessibility styling */
|
||||
|
||||
a {
|
||||
color: #485fc7;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.menu-list a {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
|
||||
.token.comment, .token.prolog, .token.doctype, .token.cdata {
|
||||
color: #8093a5;
|
||||
}
|
||||
|
||||
.token.property, .token.tag, .token.constant, .token.symbol, .token.deleted {
|
||||
color: #ff36ab;
|
||||
}
|
||||
</style>
|
||||
<script type="module" async="">import mermaid from "https://unpkg.com/mermaid@10/dist/mermaid.esm.min.mjs";document.addEventListener('DOMContentLoaded', mermaid.initialize({"loadOnSave":true}));</script>
|
||||
<script>function showTooltip(o,e){o.trigger.className.includes("tooltipped")||(o.trigger.children[0].className="tooltipped tooltipped-s",o.trigger.children[0].ariaLabel=e)}window.addEventListener("load",()=>{var o=new ClipboardJS(".code-copy");o.on("success",o=>showTooltip(o,"Copied!")),o.on("error",o=>showTooltip(o,"Failed..."))});</script>
|
||||
<script async="" src="https://cdn.jsdelivr.net/npm/clipboard@2.0.11/dist/clipboard.min.js"></script>
|
||||
|
||||
|
||||
<script src="https://wcpstatic.microsoft.com/mscc/lib/v2/wcp-consent.js" type="text/javascript"></script>
|
||||
<script>
|
||||
function onConsentChanged(categoryPreferences) {
|
||||
console.log("onConsentChanged", categoryPreferences);
|
||||
}
|
||||
|
||||
var siteConsent
|
||||
|
||||
function initialize(){
|
||||
var currentYear = new Date().getFullYear()
|
||||
document.getElementById("copyright").innerHTML = `©️ ${currentYear} Microsoft`;
|
||||
window.WcpConsent && WcpConsent.init("en-US", "cookie-banner", function (err, _siteConsent) {
|
||||
if (!err) {
|
||||
siteConsent = _siteConsent; //siteConsent is used to get the current consent
|
||||
} else {
|
||||
console.log("Error initializing WcpConsent: "+ err);
|
||||
}
|
||||
}, onConsentChanged, WcpConsent.themes.light);
|
||||
}
|
||||
|
||||
addEventListener("DOMContentLoaded", initialize)
|
||||
addEventListener("DOMContentLoaded", checkCookieManager)
|
||||
|
||||
function checkCookieManager(){
|
||||
if(siteConsent.isConsentRequired){
|
||||
document.getElementById("cookiesManager").style.display = 'block';
|
||||
document.getElementById("divider").style.display = 'block';
|
||||
}
|
||||
else{
|
||||
document.getElementById("cookiesManager").style.display = 'none';
|
||||
document.getElementById("divider").style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
function manageConsent() {
|
||||
if(siteConsent.isConsentRequired){
|
||||
siteConsent.manageConsent();
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<div id="cookie-banner"></div>
|
||||
<a href="/graphrag/"><span class="logotitle">GraphRAG</span></a>
|
||||
</header>
|
||||
<div class="page-content">
|
||||
<!-- Sidebar -->
|
||||
<aside class="menu">
|
||||
<ul class="menu-list">
|
||||
<li>
|
||||
|
||||
<a href="/graphrag/">Welcome</a>
|
||||
|
||||
</li>
|
||||
|
||||
<!-- Get Started Links -->
|
||||
<li>
|
||||
|
||||
<a href="/graphrag/posts/get_started/">Get Started</a>
|
||||
|
||||
|
||||
<a href="/graphrag/posts/developing/">Developing</a>
|
||||
|
||||
</li>
|
||||
|
||||
<!-- Indexing Links -->
|
||||
<li>
|
||||
|
||||
<a href="/graphrag/posts/index/overview/">Indexing</a>
|
||||
|
||||
<ul><li>
|
||||
<a href="/graphrag/posts/index/0-architecture/">Architecture</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/index/1-default_dataflow/">Dataflow</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/index/2-cli/">CLI</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/index/3-prompt_tuning/">Prompt Tuning</a>
|
||||
</li><li>
|
||||
|
||||
<a href="/graphrag/posts/config/overview/">Configuration</a>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="/graphrag/posts/config/env_vars">Using Env Vars</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/graphrag/posts/config/json_yaml">Using JSON or YAML</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/graphrag/posts/config/custom">Fully Custom</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/graphrag/posts/config/template">Template</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
|
||||
<!-- Query Links -->
|
||||
<li>
|
||||
|
||||
<a href="/graphrag/posts/query/overview/">Query</a>
|
||||
|
||||
<ul><li>
|
||||
<a href="/graphrag/posts/query/0-global_search/">Global Search</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/query/1-local_search/">Local Search</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/query/2-question_generation/">Question Generation</a>
|
||||
</li><li>
|
||||
<a href="/graphrag/posts/query/3-cli/">CLI</a>
|
||||
</li><li>
|
||||
|
||||
<a href="/graphrag/posts/query/notebooks/overview/">Notebooks</a>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="/graphrag/posts/query/notebooks/global_search_nb">Global Search</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/graphrag/posts/query/notebooks/local_search_nb">Local Search</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</aside>
|
||||
|
||||
<!-- Main Content -->
|
||||
<main>
|
||||
<h1>Local Search Notebook</h1>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-0" class="language-python"><span class="token triple-quoted-string string">"""
|
||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
"""</span></code></pre>
|
||||
<pre class="language-python"><code id="code-0" class="language-python"><span class="token comment"># Copyright (c) 2024 Microsoft Corporation. All rights reserved.</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-0" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -283,26 +9,31 @@ Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-1" class="language-python"><span class="token keyword">import</span> os
|
||||
|
||||
<span class="token keyword">import</span> pandas <span class="token keyword">as</span> pd
|
||||
<span class="token keyword">import</span> tiktoken
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>llm<span class="token punctuation">.</span>oai<span class="token punctuation">.</span>chat_openai <span class="token keyword">import</span> ChatOpenAI
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>llm<span class="token punctuation">.</span>oai<span class="token punctuation">.</span>typing <span class="token keyword">import</span> OpenaiApiType
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>llm<span class="token punctuation">.</span>oai<span class="token punctuation">.</span>embedding <span class="token keyword">import</span> OpenAIEmbedding
|
||||
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>context_builder<span class="token punctuation">.</span>entity_extraction <span class="token keyword">import</span> EntityVectorStoreKey
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span><span class="token builtin">input</span><span class="token punctuation">.</span>loaders<span class="token punctuation">.</span>dfs <span class="token keyword">import</span> <span class="token punctuation">(</span>
|
||||
read_community_reports<span class="token punctuation">,</span>
|
||||
read_covariates<span class="token punctuation">,</span>
|
||||
read_entities<span class="token punctuation">,</span>
|
||||
read_relationships<span class="token punctuation">,</span>
|
||||
read_covariates<span class="token punctuation">,</span>
|
||||
read_text_units<span class="token punctuation">,</span>
|
||||
store_entity_semantic_embeddings<span class="token punctuation">,</span>
|
||||
read_text_units
|
||||
<span class="token punctuation">)</span>
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>structured_search<span class="token punctuation">.</span>local_search<span class="token punctuation">.</span>mixed_context <span class="token keyword">import</span> LocalSearchMixedContext
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span><span class="token builtin">input</span><span class="token punctuation">.</span>retrieval<span class="token punctuation">.</span>relationships <span class="token keyword">import</span> <span class="token punctuation">(</span>
|
||||
calculate_relationship_combined_rank<span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>llm<span class="token punctuation">.</span>oai<span class="token punctuation">.</span>chat_openai <span class="token keyword">import</span> ChatOpenAI
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>llm<span class="token punctuation">.</span>oai<span class="token punctuation">.</span>embedding <span class="token keyword">import</span> OpenAIEmbedding
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>llm<span class="token punctuation">.</span>oai<span class="token punctuation">.</span>typing <span class="token keyword">import</span> OpenaiApiType
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>question_gen<span class="token punctuation">.</span>local_gen <span class="token keyword">import</span> LocalQuestionGen
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>structured_search<span class="token punctuation">.</span>local_search<span class="token punctuation">.</span>mixed_context <span class="token keyword">import</span> <span class="token punctuation">(</span>
|
||||
LocalSearchMixedContext<span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>structured_search<span class="token punctuation">.</span>local_search<span class="token punctuation">.</span>search <span class="token keyword">import</span> LocalSearch
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>vector_stores<span class="token punctuation">.</span>qdrant <span class="token keyword">import</span> Qdrant
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>context_builder<span class="token punctuation">.</span>entity_extraction <span class="token keyword">import</span> EntityVectorStoreKey
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span><span class="token builtin">input</span><span class="token punctuation">.</span>retrieval<span class="token punctuation">.</span>relationships <span class="token keyword">import</span> calculate_relationship_combined_rank
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>context_builder<span class="token punctuation">.</span>conversation_history <span class="token keyword">import</span> ConversationHistory
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>query<span class="token punctuation">.</span>question_gen<span class="token punctuation">.</span>local_gen <span class="token keyword">import</span> LocalQuestionGen</code></pre>
|
||||
<span class="token keyword">from</span> graphrag<span class="token punctuation">.</span>vector_stores<span class="token punctuation">.</span>qdrant <span class="token keyword">import</span> Qdrant</code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-1" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -317,7 +48,7 @@ Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
<h3>Load tables to dataframes</h3>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-21" class="language-python">INPUT_DIR <span class="token operator">=</span> <span class="token string">"./data"</span>
|
||||
<pre class="language-python"><code id="code-21" class="language-python">INPUT_DIR <span class="token operator">=</span> <span class="token string">"./inputs/operation dulce"</span>
|
||||
|
||||
COMMUNITY_REPORT_TABLE <span class="token operator">=</span> <span class="token string">"create_final_community_reports"</span>
|
||||
ENTITY_TABLE <span class="token operator">=</span> <span class="token string">"create_final_nodes"</span>
|
||||
@ -336,16 +67,20 @@ COMMUNITY_LEVEL <span class="token operator">=</span> <span class="token number"
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-25" class="language-python"><span class="token comment"># read nodes table to get community and degree data</span>
|
||||
entity_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>read_parquet<span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"</span><span class="token interpolation"><span class="token punctuation">{</span>INPUT_DIR<span class="token punctuation">}</span></span><span class="token string">/</span><span class="token interpolation"><span class="token punctuation">{</span>ENTITY_TABLE<span class="token punctuation">}</span></span><span class="token string">.parquet"</span></span><span class="token punctuation">)</span>
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token punctuation">(</span>entity_df<span class="token punctuation">.</span><span class="token builtin">type</span><span class="token operator">==</span><span class="token string">"entity"</span><span class="token punctuation">)</span> <span class="token operator">&</span> <span class="token punctuation">(</span>entity_df<span class="token punctuation">.</span>level<span class="token operator"><=</span><span class="token string-interpolation"><span class="token string">f"level_</span><span class="token interpolation"><span class="token punctuation">{</span>COMMUNITY_LEVEL<span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span><span class="token punctuation">]</span>
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token string">"title"</span><span class="token punctuation">,</span> <span class="token string">"degree"</span><span class="token punctuation">,</span> <span class="token string">"community"</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">.</span>rename<span class="token punctuation">(</span>columns<span class="token operator">=</span><span class="token punctuation">{</span><span class="token string">"title"</span><span class="token punctuation">:</span> <span class="token string">"name"</span><span class="token punctuation">,</span> <span class="token string">"degree"</span><span class="token punctuation">:</span> <span class="token string">"rank"</span><span class="token punctuation">}</span><span class="token punctuation">)</span>
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">[</span>
|
||||
<span class="token punctuation">(</span>entity_df<span class="token punctuation">.</span><span class="token builtin">type</span> <span class="token operator">==</span> <span class="token string">"entity"</span><span class="token punctuation">)</span> <span class="token operator">&</span> <span class="token punctuation">(</span>entity_df<span class="token punctuation">.</span>level <span class="token operator"><=</span> <span class="token string-interpolation"><span class="token string">f"level_</span><span class="token interpolation"><span class="token punctuation">{</span>COMMUNITY_LEVEL<span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
|
||||
<span class="token punctuation">]</span>
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token string">"title"</span><span class="token punctuation">,</span> <span class="token string">"degree"</span><span class="token punctuation">,</span> <span class="token string">"community"</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">.</span>rename<span class="token punctuation">(</span>
|
||||
columns<span class="token operator">=</span><span class="token punctuation">{</span><span class="token string">"title"</span><span class="token punctuation">:</span> <span class="token string">"name"</span><span class="token punctuation">,</span> <span class="token string">"degree"</span><span class="token punctuation">:</span> <span class="token string">"rank"</span><span class="token punctuation">}</span>
|
||||
<span class="token punctuation">)</span>
|
||||
|
||||
entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span> <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>fillna<span class="token punctuation">(</span><span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">)</span>
|
||||
entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span> <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">int</span><span class="token punctuation">)</span>
|
||||
entity_df<span class="token punctuation">[</span><span class="token string">"rank"</span><span class="token punctuation">]</span> <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token string">"rank"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">int</span><span class="token punctuation">)</span>
|
||||
|
||||
<span class="token comment"># for duplicate entities, keep the one with the highest community level</span>
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">.</span>groupby<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">"name"</span><span class="token punctuation">,</span> <span class="token string">"rank"</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">.</span>agg<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">"community"</span><span class="token punctuation">:</span> <span class="token string">"max"</span><span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">.</span>resetindex<span class="token punctuation">(</span><span class="token punctuation">)</span>
|
||||
entity_df<span class="token punctuation">[</span><span class="token string">'community'</span><span class="token punctuation">]</span> <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token string">'community'</span><span class="token punctuation">]</span><span class="token punctuation">.</span><span class="token builtin">apply</span><span class="token punctuation">(</span><span class="token keyword">lambda</span> x<span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token builtin">str</span><span class="token punctuation">(</span>x<span class="token punctuation">)</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">.</span>groupby<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">"name"</span><span class="token punctuation">,</span> <span class="token string">"rank"</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">.</span>agg<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">"community"</span><span class="token punctuation">:</span> <span class="token string">"max"</span><span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">.</span>reset_index<span class="token punctuation">(</span><span class="token punctuation">)</span>
|
||||
entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span> <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span><span class="token punctuation">.</span><span class="token builtin">apply</span><span class="token punctuation">(</span><span class="token keyword">lambda</span> x<span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token builtin">str</span><span class="token punctuation">(</span>x<span class="token punctuation">)</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
|
||||
|
||||
entity_embedding_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>read_parquet<span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"</span><span class="token interpolation"><span class="token punctuation">{</span>INPUT_DIR<span class="token punctuation">}</span></span><span class="token string">/</span><span class="token interpolation"><span class="token punctuation">{</span>ENTITY_EMBEDDING_TABLE<span class="token punctuation">}</span></span><span class="token string">.parquet"</span></span><span class="token punctuation">)</span>
|
||||
entity_embedding_df <span class="token operator">=</span> entity_embedding_df<span class="token punctuation">[</span>
|
||||
@ -355,13 +90,14 @@ entity_embedding_df <span class="token operator">=</span> entity_embedding_df<sp
|
||||
<span class="token string">"name"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"type"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"description"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"name_embedding"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"description_embedding"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"text_unit_ids"</span>
|
||||
<span class="token string">"text_unit_ids"</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">]</span>
|
||||
<span class="token punctuation">]</span>
|
||||
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">.</span>merge<span class="token punctuation">(</span>entity_embedding_df<span class="token punctuation">,</span> on<span class="token operator">=</span><span class="token string">'name'</span><span class="token punctuation">,</span> how<span class="token operator">=</span><span class="token string">'inner'</span><span class="token punctuation">)</span><span class="token punctuation">.</span>drop_duplicates<span class="token punctuation">(</span>subset<span class="token operator">=</span><span class="token punctuation">[</span><span class="token string">'name'</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
|
||||
entity_df <span class="token operator">=</span> entity_df<span class="token punctuation">.</span>merge<span class="token punctuation">(</span>
|
||||
entity_embedding_df<span class="token punctuation">,</span> on<span class="token operator">=</span><span class="token string">"name"</span><span class="token punctuation">,</span> how<span class="token operator">=</span><span class="token string">"inner"</span>
|
||||
<span class="token punctuation">)</span><span class="token punctuation">.</span>drop_duplicates<span class="token punctuation">(</span>subset<span class="token operator">=</span><span class="token punctuation">[</span><span class="token string">"name"</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
|
||||
|
||||
<span class="token comment"># read entity dataframe to knowledge model objects</span>
|
||||
entities <span class="token operator">=</span> read_entities<span class="token punctuation">(</span>
|
||||
@ -373,7 +109,7 @@ entities <span class="token operator">=</span> read_entities<span class="token p
|
||||
description_col<span class="token operator">=</span><span class="token string">"description"</span><span class="token punctuation">,</span>
|
||||
community_col<span class="token operator">=</span><span class="token string">"community"</span><span class="token punctuation">,</span>
|
||||
rank_col<span class="token operator">=</span><span class="token string">"rank"</span><span class="token punctuation">,</span>
|
||||
name_embedding_col<span class="token operator">=</span><span class="token string">"name_embedding"</span><span class="token punctuation">,</span>
|
||||
name_embedding_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
description_embedding_col<span class="token operator">=</span><span class="token string">"description_embedding"</span><span class="token punctuation">,</span>
|
||||
graph_embedding_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
text_unit_ids_col<span class="token operator">=</span><span class="token string">"text_unit_ids"</span><span class="token punctuation">,</span>
|
||||
@ -387,13 +123,11 @@ description_embedding_store <span class="token operator">=</span> Qdrant<span cl
|
||||
<span class="token punctuation">)</span>
|
||||
description_embedding_store<span class="token punctuation">.</span>connect<span class="token punctuation">(</span><span class="token punctuation">)</span>
|
||||
entity_description_embeddings <span class="token operator">=</span> store_entity_semantic_embeddings<span class="token punctuation">(</span>
|
||||
entities<span class="token operator">=</span>entities<span class="token punctuation">,</span>
|
||||
vectorstore<span class="token operator">=</span>description_embedding_store
|
||||
entities<span class="token operator">=</span>entities<span class="token punctuation">,</span> vectorstore<span class="token operator">=</span>description_embedding_store
|
||||
<span class="token punctuation">)</span>
|
||||
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f'Entity count: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>entity_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">'</span></span><span class="token punctuation">)</span>
|
||||
entity_df<span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span>
|
||||
</code></pre>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Entity count: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>entity_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
|
||||
entity_df<span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-25" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -403,20 +137,23 @@ entity_df<span class="token punctuation">.</span>head<span class="token punctuat
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-29" class="language-python">relationship_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>read_parquet<span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"</span><span class="token interpolation"><span class="token punctuation">{</span>INPUT_DIR<span class="token punctuation">}</span></span><span class="token string">/</span><span class="token interpolation"><span class="token punctuation">{</span>RELATIONSHIP_TABLE<span class="token punctuation">}</span></span><span class="token string">.parquet"</span></span><span class="token punctuation">)</span>
|
||||
<span class="token comment">#relationship_df = relationship_df[relationship_df.raw_level_ == "level_0"]</span>
|
||||
relationship_df <span class="token operator">=</span> relationship_df<span class="token punctuation">[</span><span class="token punctuation">[</span>
|
||||
<span class="token string">"id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"human_readable_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"source"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"target"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"description"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"weight"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"text_unit_ids"</span>
|
||||
<span class="token punctuation">]</span><span class="token punctuation">]</span>
|
||||
relationship_df <span class="token operator">=</span> relationship_df<span class="token punctuation">[</span>
|
||||
<span class="token punctuation">[</span>
|
||||
<span class="token string">"id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"human_readable_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"source"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"target"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"description"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"weight"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"text_unit_ids"</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">]</span>
|
||||
<span class="token punctuation">]</span>
|
||||
relationship_df<span class="token punctuation">[</span><span class="token string">"id"</span><span class="token punctuation">]</span> <span class="token operator">=</span> relationship_df<span class="token punctuation">[</span><span class="token string">"id"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">str</span><span class="token punctuation">)</span>
|
||||
relationship_df<span class="token punctuation">[</span><span class="token string">"human_readable_id"</span><span class="token punctuation">]</span> <span class="token operator">=</span> relationship_df<span class="token punctuation">[</span><span class="token string">"human_readable_id"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">str</span><span class="token punctuation">)</span>
|
||||
relationship_df<span class="token punctuation">[</span><span class="token string">"weight"</span><span class="token punctuation">]</span> <span class="token operator">=</span> relationship_df<span class="token punctuation">[</span><span class="token string">"weight"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">float</span><span class="token punctuation">)</span>
|
||||
relationship_df<span class="token punctuation">[</span><span class="token string">"text_unit_ids"</span><span class="token punctuation">]</span> <span class="token operator">=</span> relationship_df<span class="token punctuation">[</span><span class="token string">"text_unit_ids"</span><span class="token punctuation">]</span><span class="token punctuation">.</span><span class="token builtin">apply</span><span class="token punctuation">(</span><span class="token keyword">lambda</span> x<span class="token punctuation">:</span> x<span class="token punctuation">.</span>split<span class="token punctuation">(</span><span class="token string">","</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
|
||||
relationship_df<span class="token punctuation">[</span><span class="token string">"text_unit_ids"</span><span class="token punctuation">]</span> <span class="token operator">=</span> relationship_df<span class="token punctuation">[</span><span class="token string">"text_unit_ids"</span><span class="token punctuation">]</span><span class="token punctuation">.</span><span class="token builtin">apply</span><span class="token punctuation">(</span>
|
||||
<span class="token keyword">lambda</span> x<span class="token punctuation">:</span> x<span class="token punctuation">.</span>split<span class="token punctuation">(</span><span class="token string">","</span><span class="token punctuation">)</span>
|
||||
<span class="token punctuation">)</span>
|
||||
|
||||
relationships <span class="token operator">=</span> read_relationships<span class="token punctuation">(</span>
|
||||
df<span class="token operator">=</span>relationship_df<span class="token punctuation">,</span>
|
||||
@ -430,11 +167,12 @@ relationships <span class="token operator">=</span> read_relationships<span clas
|
||||
text_unit_ids_col<span class="token operator">=</span><span class="token string">"text_unit_ids"</span><span class="token punctuation">,</span>
|
||||
document_ids_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
relationships <span class="token operator">=</span> calculate_relationship_combined_rank<span class="token punctuation">(</span>relationships<span class="token operator">=</span>relationships<span class="token punctuation">,</span> entities<span class="token operator">=</span>entities<span class="token punctuation">,</span> ranking_attribute<span class="token operator">=</span><span class="token string">"rank"</span><span class="token punctuation">)</span>
|
||||
relationships <span class="token operator">=</span> calculate_relationship_combined_rank<span class="token punctuation">(</span>
|
||||
relationships<span class="token operator">=</span>relationships<span class="token punctuation">,</span> entities<span class="token operator">=</span>entities<span class="token punctuation">,</span> ranking_attribute<span class="token operator">=</span><span class="token string">"rank"</span>
|
||||
<span class="token punctuation">)</span>
|
||||
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Relationship count: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>relationship_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
|
||||
relationship_df<span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span>
|
||||
</code></pre>
|
||||
relationship_df<span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-29" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -443,42 +181,60 @@ relationship_df<span class="token punctuation">.</span>head<span class="token pu
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-30" class="language-python"><span class="token keyword">try</span><span class="token punctuation">:</span>
|
||||
covariate_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>read_parquet<span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"</span><span class="token interpolation"><span class="token punctuation">{</span>INPUT_DIR<span class="token punctuation">}</span></span><span class="token string">/</span><span class="token interpolation"><span class="token punctuation">{</span>COVARIATE_TABLE<span class="token punctuation">}</span></span><span class="token string">.parquet"</span></span><span class="token punctuation">)</span>
|
||||
covariate_df <span class="token operator">=</span> covariate_df<span class="token punctuation">[</span>
|
||||
<span class="token punctuation">[</span>
|
||||
<span class="token string">"id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"human_readable_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"type"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"subject_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"subject_type"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"object_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"status"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"start_date"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"end_date"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"description"</span>
|
||||
<span class="token punctuation">]</span>
|
||||
<span class="token punctuation">]</span>
|
||||
covariate_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>read_parquet<span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"</span><span class="token interpolation"><span class="token punctuation">{</span>INPUT_DIR<span class="token punctuation">}</span></span><span class="token string">/</span><span class="token interpolation"><span class="token punctuation">{</span>COVARIATE_TABLE<span class="token punctuation">}</span></span><span class="token string">.parquet"</span></span><span class="token punctuation">)</span>
|
||||
covariate_df <span class="token operator">=</span> <span class="token punctuation">(</span>
|
||||
covariate_df<span class="token punctuation">[</span>
|
||||
<span class="token punctuation">[</span>
|
||||
<span class="token string">"id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"human_readable_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"type"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"subject_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"subject_type"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"object_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"status"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"start_date"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"end_date"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"description"</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">]</span>
|
||||
<span class="token punctuation">]</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
|
||||
<span class="token keyword">except</span><span class="token punctuation">:</span> <span class="token comment"># noqa: E722</span>
|
||||
columns <span class="token operator">=</span> <span class="token punctuation">[</span>
|
||||
<span class="token string">"id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"human_readable_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"type"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"subject_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"object_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"status"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"start_date"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"end_date"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"description"</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">]</span>
|
||||
covariate_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>DataFrame<span class="token punctuation">(</span><span class="token punctuation">{</span>column<span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token punctuation">]</span> <span class="token keyword">for</span> column <span class="token keyword">in</span> columns<span class="token punctuation">}</span><span class="token punctuation">)</span>
|
||||
|
||||
<span class="token keyword">except</span><span class="token punctuation">:</span>
|
||||
columns <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token string">'id'</span><span class="token punctuation">,</span> <span class="token string">'human_readable_id'</span><span class="token punctuation">,</span> <span class="token string">'type'</span><span class="token punctuation">,</span> <span class="token string">'subject_id'</span><span class="token punctuation">,</span> <span class="token string">'object_id'</span><span class="token punctuation">,</span> <span class="token string">'status'</span><span class="token punctuation">,</span> <span class="token string">'start_date'</span><span class="token punctuation">,</span> <span class="token string">'end_date'</span><span class="token punctuation">,</span> <span class="token string">'description'</span><span class="token punctuation">]</span>
|
||||
covariate_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>DataFrame<span class="token punctuation">(</span>data<span class="token operator">=</span><span class="token punctuation">[</span><span class="token punctuation">]</span><span class="token punctuation">,</span> columns<span class="token operator">=</span>columns<span class="token punctuation">)</span>
|
||||
covariate_df<span class="token punctuation">[</span><span class="token string">"id"</span><span class="token punctuation">]</span> <span class="token operator">=</span> covariate_df<span class="token punctuation">[</span><span class="token string">"id"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">str</span><span class="token punctuation">)</span>
|
||||
covariate_df<span class="token punctuation">[</span><span class="token string">"human_readable_id"</span><span class="token punctuation">]</span> <span class="token operator">=</span> covariate_df<span class="token punctuation">[</span><span class="token string">"human_readable_id"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">str</span><span class="token punctuation">)</span>
|
||||
|
||||
claims <span class="token operator">=</span> read_covariates<span class="token punctuation">(</span>
|
||||
df<span class="token operator">=</span>covariate_df<span class="token punctuation">,</span>
|
||||
id_col<span class="token operator">=</span><span class="token string">"id"</span><span class="token punctuation">,</span>
|
||||
short_id_col<span class="token operator">=</span><span class="token string">"human_readable_id"</span><span class="token punctuation">,</span>
|
||||
subject_col<span class="token operator">=</span><span class="token string">"subject_id"</span><span class="token punctuation">,</span>
|
||||
subject_type_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
covariate_type_col<span class="token operator">=</span><span class="token string">"type"</span><span class="token punctuation">,</span>
|
||||
attributes_cols<span class="token operator">=</span><span class="token punctuation">[</span><span class="token string">"object_id"</span><span class="token punctuation">,</span><span class="token string">"status"</span><span class="token punctuation">,</span> <span class="token string">"start_date"</span><span class="token punctuation">,</span> <span class="token string">"end_date"</span><span class="token punctuation">,</span> <span class="token string">"description"</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
|
||||
text_unit_ids_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
document_ids_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f'Claim records: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>claims<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">'</span></span><span class="token punctuation">)</span>
|
||||
covariates <span class="token operator">=</span> <span class="token punctuation">{</span><span class="token string">'claims'</span><span class="token punctuation">:</span> claims<span class="token punctuation">}</span>
|
||||
</code></pre>
|
||||
df<span class="token operator">=</span>covariate_df<span class="token punctuation">,</span>
|
||||
id_col<span class="token operator">=</span><span class="token string">"id"</span><span class="token punctuation">,</span>
|
||||
short_id_col<span class="token operator">=</span><span class="token string">"human_readable_id"</span><span class="token punctuation">,</span>
|
||||
subject_col<span class="token operator">=</span><span class="token string">"subject_id"</span><span class="token punctuation">,</span>
|
||||
subject_type_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
covariate_type_col<span class="token operator">=</span><span class="token string">"type"</span><span class="token punctuation">,</span>
|
||||
attributes_cols<span class="token operator">=</span><span class="token punctuation">[</span>
|
||||
<span class="token string">"object_id"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"status"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"start_date"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"end_date"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"description"</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">]</span><span class="token punctuation">,</span>
|
||||
text_unit_ids_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
document_ids_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Claim records: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>claims<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
|
||||
covariates <span class="token operator">=</span> <span class="token punctuation">{</span><span class="token string">"claims"</span><span class="token punctuation">:</span> claims<span class="token punctuation">}</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-30" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -490,8 +246,8 @@ covariates <span class="token operator">=</span> <span class="token punctuation"
|
||||
<pre class="language-python"><code id="code-34" class="language-python"><span class="token comment"># get a list of communities from entity table</span>
|
||||
community_df <span class="token operator">=</span> entity_df<span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">.</span>copy<span class="token punctuation">(</span><span class="token punctuation">)</span>
|
||||
community_df<span class="token punctuation">[</span><span class="token string">"community_id"</span><span class="token punctuation">]</span> <span class="token operator">=</span> community_df<span class="token punctuation">[</span><span class="token string">"community"</span><span class="token punctuation">]</span><span class="token punctuation">.</span><span class="token builtin">apply</span><span class="token punctuation">(</span><span class="token keyword">lambda</span> x<span class="token punctuation">:</span> <span class="token builtin">str</span><span class="token punctuation">(</span>x<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
|
||||
community_df <span class="token operator">=</span> community_df<span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token string">"community_id"</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">.</span>drop_duplicates<span class="token punctuation">(</span>subset<span class="token operator">=</span><span class="token punctuation">[</span><span class="token string">'community_id'</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f'Community records: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>community_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">'</span></span><span class="token punctuation">)</span></code></pre>
|
||||
community_df <span class="token operator">=</span> community_df<span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token string">"community_id"</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">.</span>drop_duplicates<span class="token punctuation">(</span>subset<span class="token operator">=</span><span class="token punctuation">[</span><span class="token string">"community_id"</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Community records: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>community_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-34" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -501,9 +257,12 @@ community_df <span class="token operator">=</span> community_df<span class="toke
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-35" class="language-python">report_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>read_parquet<span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"</span><span class="token interpolation"><span class="token punctuation">{</span>INPUT_DIR<span class="token punctuation">}</span></span><span class="token string">/</span><span class="token interpolation"><span class="token punctuation">{</span>COMMUNITY_REPORT_TABLE<span class="token punctuation">}</span></span><span class="token string">.parquet"</span></span><span class="token punctuation">)</span>
|
||||
report_df <span class="token operator">=</span> report_df<span class="token punctuation">[</span>report_df<span class="token punctuation">.</span>level <span class="token operator"><=</span> <span class="token string-interpolation"><span class="token string">f"level_</span><span class="token interpolation"><span class="token punctuation">{</span>COMMUNITY_LEVEL<span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">]</span>
|
||||
report_df <span class="token operator">=</span> report_df<span class="token punctuation">.</span>merge<span class="token punctuation">(</span>community_df<span class="token punctuation">,</span> on<span class="token operator">=</span><span class="token string">'community_id'</span><span class="token punctuation">,</span> how<span class="token operator">=</span><span class="token string">'inner'</span><span class="token punctuation">)</span>
|
||||
|
||||
report_df<span class="token punctuation">[</span><span class="token string">"rank"</span><span class="token punctuation">]</span> <span class="token operator">=</span> report_df<span class="token punctuation">[</span><span class="token string">"rank"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>fillna<span class="token punctuation">(</span><span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">)</span>
|
||||
report_df<span class="token punctuation">[</span><span class="token string">"rank"</span><span class="token punctuation">]</span> <span class="token operator">=</span> report_df<span class="token punctuation">[</span><span class="token string">"rank"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">int</span><span class="token punctuation">)</span>
|
||||
|
||||
report_df <span class="token operator">=</span> report_df<span class="token punctuation">.</span>merge<span class="token punctuation">(</span>community_df<span class="token punctuation">,</span> on<span class="token operator">=</span><span class="token string">"community_id"</span><span class="token punctuation">,</span> how<span class="token operator">=</span><span class="token string">"inner"</span><span class="token punctuation">)</span>
|
||||
|
||||
reports <span class="token operator">=</span> read_community_reports<span class="token punctuation">(</span>
|
||||
df<span class="token operator">=</span>report_df<span class="token punctuation">,</span>
|
||||
id_col<span class="token operator">=</span><span class="token string">"community_id"</span><span class="token punctuation">,</span>
|
||||
@ -517,7 +276,7 @@ reports <span class="token operator">=</span> read_community_reports<span class=
|
||||
content_embedding_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f'Report records: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>report_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">'</span></span><span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Report records: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>report_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
|
||||
report_df<span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-35" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
@ -538,9 +297,8 @@ text_units <span class="token operator">=</span> read_text_units<span class="tok
|
||||
entities_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
relationships_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
covariates_col<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
|
||||
<span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f'Text unit records: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>text_unit_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">'</span></span><span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string-interpolation"><span class="token string">f"Text unit records: </span><span class="token interpolation"><span class="token punctuation">{</span><span class="token builtin">len</span><span class="token punctuation">(</span>text_unit_df<span class="token punctuation">)</span><span class="token punctuation">}</span></span><span class="token string">"</span></span><span class="token punctuation">)</span>
|
||||
text_unit_df<span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-39" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
@ -549,17 +307,15 @@ text_unit_df<span class="token punctuation">.</span>head<span class="token punct
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-40" class="language-python">api_key <span class="token operator">=</span> <span class="token string">"<api_key>"</span>
|
||||
api_version <span class="token operator">=</span> <span class="token string">"api_version"</span>
|
||||
llm_model <span class="token operator">=</span> <span class="token string">"model or deployment id"</span>
|
||||
embedding_model <span class="token operator">=</span> <span class="token string">"model or deployment id"</span>
|
||||
<pre class="language-python"><code id="code-40" class="language-python">api_key <span class="token operator">=</span> os<span class="token punctuation">.</span>environ<span class="token punctuation">[</span><span class="token string">"GRAPHRAG_API_KEY"</span><span class="token punctuation">]</span>
|
||||
llm_model <span class="token operator">=</span> os<span class="token punctuation">.</span>environ<span class="token punctuation">[</span><span class="token string">"GRAPHRAG_EMBEDDING_MODEL"</span><span class="token punctuation">]</span>
|
||||
embedding_model <span class="token operator">=</span> os<span class="token punctuation">.</span>environ<span class="token punctuation">[</span><span class="token string">"GRAPHRAG_EMBEDDING_MODEL"</span><span class="token punctuation">]</span>
|
||||
|
||||
llm <span class="token operator">=</span> ChatOpenAI<span class="token punctuation">(</span>
|
||||
api_key<span class="token operator">=</span>api_key<span class="token punctuation">,</span>
|
||||
model<span class="token operator">=</span>llm_model<span class="token punctuation">,</span>
|
||||
api_type<span class="token operator">=</span>OpenaiApiType<span class="token punctuation">.</span>OpenAI<span class="token punctuation">,</span> <span class="token comment"># OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI</span>
|
||||
api_version<span class="token operator">=</span>api_version<span class="token punctuation">,</span>
|
||||
max_retries<span class="token operator">=</span><span class="token number">20</span>
|
||||
api_type<span class="token operator">=</span>OpenaiApiType<span class="token punctuation">.</span>OpenAI<span class="token punctuation">,</span> <span class="token comment"># OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI</span>
|
||||
max_retries<span class="token operator">=</span><span class="token number">20</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span>
|
||||
|
||||
token_encoder <span class="token operator">=</span> tiktoken<span class="token punctuation">.</span>get_encoding<span class="token punctuation">(</span><span class="token string">"cl100k_base"</span><span class="token punctuation">)</span>
|
||||
@ -570,7 +326,6 @@ text_embedder <span class="token operator">=</span> OpenAIEmbedding<span class="
|
||||
api_type<span class="token operator">=</span>OpenaiApiType<span class="token punctuation">.</span>OpenAI<span class="token punctuation">,</span>
|
||||
model<span class="token operator">=</span>embedding_model<span class="token punctuation">,</span>
|
||||
deployment_name<span class="token operator">=</span>embedding_model<span class="token punctuation">,</span>
|
||||
api_version<span class="token operator">=</span>api_version<span class="token punctuation">,</span>
|
||||
max_retries<span class="token operator">=</span><span class="token number">20</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span></code></pre>
|
||||
|
||||
@ -588,7 +343,7 @@ text_embedder <span class="token operator">=</span> OpenAIEmbedding<span class="
|
||||
relationships<span class="token operator">=</span>relationships<span class="token punctuation">,</span>
|
||||
covariates<span class="token operator">=</span>covariates<span class="token punctuation">,</span>
|
||||
entity_text_embeddings<span class="token operator">=</span>description_embedding_store<span class="token punctuation">,</span>
|
||||
embedding_vectorstore_key<span class="token operator">=</span>EntityVectorStoreKey<span class="token punctuation">.</span>ID<span class="token punctuation">,</span> <span class="token comment"># if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE</span>
|
||||
embedding_vectorstore_key<span class="token operator">=</span>EntityVectorStoreKey<span class="token punctuation">.</span>ID<span class="token punctuation">,</span> <span class="token comment"># if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE</span>
|
||||
text_embedder<span class="token operator">=</span>text_embedder<span class="token punctuation">,</span>
|
||||
token_encoder<span class="token operator">=</span>token_encoder<span class="token punctuation">,</span>
|
||||
<span class="token punctuation">)</span></code></pre>
|
||||
@ -600,8 +355,7 @@ text_embedder <span class="token operator">=</span> OpenAIEmbedding<span class="
|
||||
<h3>Create local search engine</h3>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-48" class="language-python">
|
||||
<span class="token comment"># text_unit_prop: proportion of context window dedicated to related text units</span>
|
||||
<pre class="language-python"><code id="code-48" class="language-python"><span class="token comment"># text_unit_prop: proportion of context window dedicated to related text units</span>
|
||||
<span class="token comment"># community_prop: proportion of context window dedicated to community reports.</span>
|
||||
<span class="token comment"># The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1</span>
|
||||
<span class="token comment"># conversation_history_max_turns: maximum number of turns to include in the conversation history.</span>
|
||||
@ -628,12 +382,12 @@ local_context_params <span class="token operator">=</span> <span class="token pu
|
||||
<span class="token string">"include_relationship_weight"</span><span class="token punctuation">:</span> <span class="token boolean">True</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"include_community_rank"</span><span class="token punctuation">:</span> <span class="token boolean">False</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"return_candidate_context"</span><span class="token punctuation">:</span> <span class="token boolean">False</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"embedding_vectorstore_key"</span><span class="token punctuation">:</span> EntityVectorStoreKey<span class="token punctuation">.</span>ID<span class="token punctuation">,</span> <span class="token comment"># set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids</span>
|
||||
<span class="token string">"max_tokens"</span><span class="token punctuation">:</span> <span class="token number">16000</span> <span class="token comment"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)</span>
|
||||
<span class="token string">"embedding_vectorstore_key"</span><span class="token punctuation">:</span> EntityVectorStoreKey<span class="token punctuation">.</span>ID<span class="token punctuation">,</span> <span class="token comment"># set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids</span>
|
||||
<span class="token string">"max_tokens"</span><span class="token punctuation">:</span> <span class="token number">12_000</span><span class="token punctuation">,</span> <span class="token comment"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)</span>
|
||||
<span class="token punctuation">}</span>
|
||||
|
||||
llm_params <span class="token operator">=</span> <span class="token punctuation">{</span>
|
||||
<span class="token string">"max_tokens"</span><span class="token punctuation">:</span> <span class="token number">2000</span><span class="token punctuation">,</span> <span class="token comment"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)</span>
|
||||
<span class="token string">"max_tokens"</span><span class="token punctuation">:</span> <span class="token number">2_000</span><span class="token punctuation">,</span> <span class="token comment"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)</span>
|
||||
<span class="token string">"temperature"</span><span class="token punctuation">:</span> <span class="token number">0.0</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">}</span></code></pre>
|
||||
|
||||
@ -649,7 +403,7 @@ llm_params <span class="token operator">=</span> <span class="token punctuation"
|
||||
token_encoder<span class="token operator">=</span>token_encoder<span class="token punctuation">,</span>
|
||||
llm_params<span class="token operator">=</span>llm_params<span class="token punctuation">,</span>
|
||||
context_builder_params<span class="token operator">=</span>local_context_params<span class="token punctuation">,</span>
|
||||
response_type<span class="token operator">=</span><span class="token string">"multiple paragraphs"</span> <span class="token comment"># free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report</span>
|
||||
response_type<span class="token operator">=</span><span class="token string">"multiple paragraphs"</span><span class="token punctuation">,</span> <span class="token comment"># free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report</span>
|
||||
<span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-49" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
@ -659,7 +413,7 @@ llm_params <span class="token operator">=</span> <span class="token punctuation"
|
||||
<h3>Run local search on sample queries</h3>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-53" class="language-python">result <span class="token operator">=</span> <span class="token keyword">await</span> search_engine<span class="token punctuation">.</span>asearch<span class="token punctuation">(</span><span class="token string">'What are the healing properties of chamomile?'</span><span class="token punctuation">)</span>
|
||||
<pre class="language-python"><code id="code-53" class="language-python">result <span class="token operator">=</span> <span class="token keyword">await</span> search_engine<span class="token punctuation">.</span>asearch<span class="token punctuation">(</span><span class="token string">"Tell me about Agent Mercer"</span><span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span>result<span class="token punctuation">.</span>response<span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-53" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
@ -668,10 +422,9 @@ llm_params <span class="token operator">=</span> <span class="token punctuation"
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-54" class="language-python">question <span class="token operator">=</span> <span class="token string">"When is it better to harvest chamomile?"</span>
|
||||
<pre class="language-python"><code id="code-54" class="language-python">question <span class="token operator">=</span> <span class="token string">"Tell me about Dr. Jordan Hayes"</span>
|
||||
result <span class="token operator">=</span> <span class="token keyword">await</span> search_engine<span class="token punctuation">.</span>asearch<span class="token punctuation">(</span>question<span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span>result<span class="token punctuation">.</span>response<span class="token punctuation">)</span>
|
||||
</code></pre>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span>result<span class="token punctuation">.</span>response<span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-54" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -680,7 +433,7 @@ result <span class="token operator">=</span> <span class="token keyword">await</
|
||||
<h4>Inspecting the context data used to generate the response</h4>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-58" class="language-python">result<span class="token punctuation">.</span>context_data<span class="token punctuation">[</span><span class="token string">'entities'</span><span class="token punctuation">]</span><span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
<pre class="language-python"><code id="code-58" class="language-python">result<span class="token punctuation">.</span>context_data<span class="token punctuation">[</span><span class="token string">"entities"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-58" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -688,7 +441,7 @@ result <span class="token operator">=</span> <span class="token keyword">await</
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-59" class="language-python">result<span class="token punctuation">.</span>context_data<span class="token punctuation">[</span><span class="token string">'relationships'</span><span class="token punctuation">]</span><span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
<pre class="language-python"><code id="code-59" class="language-python">result<span class="token punctuation">.</span>context_data<span class="token punctuation">[</span><span class="token string">"relationships"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-59" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -696,7 +449,7 @@ result <span class="token operator">=</span> <span class="token keyword">await</
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-60" class="language-python">result<span class="token punctuation">.</span>context_data<span class="token punctuation">[</span><span class="token string">'reports'</span><span class="token punctuation">]</span><span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
<pre class="language-python"><code id="code-60" class="language-python">result<span class="token punctuation">.</span>context_data<span class="token punctuation">[</span><span class="token string">"reports"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-60" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -704,7 +457,7 @@ result <span class="token operator">=</span> <span class="token keyword">await</
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-61" class="language-python">result<span class="token punctuation">.</span>context_data<span class="token punctuation">[</span><span class="token string">'sources'</span><span class="token punctuation">]</span><span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
<pre class="language-python"><code id="code-61" class="language-python">result<span class="token punctuation">.</span>context_data<span class="token punctuation">[</span><span class="token string">"sources"</span><span class="token punctuation">]</span><span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token punctuation">)</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-61" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
@ -729,13 +482,11 @@ result <span class="token operator">=</span> <span class="token keyword">await</
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-python"><code id="code-69" class="language-python">question_history <span class="token operator">=</span> <span class="token punctuation">[</span>
|
||||
<span class="token string">"Tell me about chamomile"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"what is its role in herbal medicine?"</span>
|
||||
<span class="token string">"Tell me about Agent Mercer"</span><span class="token punctuation">,</span>
|
||||
<span class="token string">"What happens in Dulce military base?"</span><span class="token punctuation">,</span>
|
||||
<span class="token punctuation">]</span>
|
||||
candidate_questions <span class="token operator">=</span> <span class="token keyword">await</span> question_generator<span class="token punctuation">.</span>agenerate<span class="token punctuation">(</span>
|
||||
question_history<span class="token operator">=</span>question_history<span class="token punctuation">,</span>
|
||||
context_data<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
|
||||
question_count<span class="token operator">=</span><span class="token number">5</span>
|
||||
question_history<span class="token operator">=</span>question_history<span class="token punctuation">,</span> context_data<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span> question_count<span class="token operator">=</span><span class="token number">5</span>
|
||||
<span class="token punctuation">)</span>
|
||||
<span class="token keyword">print</span><span class="token punctuation">(</span>candidate_questions<span class="token punctuation">.</span>response<span class="token punctuation">)</span></code></pre>
|
||||
|
||||
@ -743,23 +494,3 @@ candidate_questions <span class="token operator">=</span> <span class="token key
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
</main>
|
||||
</div>
|
||||
<footer>
|
||||
<a href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy</a>
|
||||
|
|
||||
<a href="https://go.microsoft.com/fwlink/?LinkId=2259814">Consumer Health Privacy</a>
|
||||
|
|
||||
<span id="cookiesManager" onClick="manageConsent();">Cookies</span>
|
||||
<span id="divider">|</span>
|
||||
<a href="https://go.microsoft.com/fwlink/?LinkID=206977">Terms of Use</a>
|
||||
|
|
||||
<a href="https://www.microsoft.com/trademarks">Trademarks</a>
|
||||
|
|
||||
<a href="https://www.microsoft.com" id="copyright"></a>
|
||||
|
|
||||
<a href="https://github.com/microsoft/graphrag">GitHub</a>
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
Loading…
x
Reference in New Issue
Block a user