mirror of
https://github.com/microsoft/graphrag.git
synced 2025-09-17 20:24:20 +00:00
326 lines
13 KiB
HTML
326 lines
13 KiB
HTML
|
|
|
|
|
|
|
|
<!doctype html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Local Search 🔎</title>
|
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
|
|
<link href="https://unpkg.com/prismjs@1.20.0/themes/prism-okaidia.css" rel="stylesheet">
|
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Primer/19.1.1/tooltips.min.css" crossorigin="anonymous" referrerpolicy="no-referrer">
|
|
<style>
|
|
html {
|
|
padding: 0;
|
|
margin: 0;
|
|
}
|
|
|
|
body{
|
|
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
|
|
padding: 0;
|
|
margin: 0;
|
|
}
|
|
|
|
footer{
|
|
width: 100%;
|
|
height: 32px;
|
|
font-size: 12px;
|
|
display: flex;
|
|
flex-direction: row;
|
|
justify-content: center;
|
|
gap: 18px;
|
|
align-items: center;
|
|
color: #5d5d5d;
|
|
background: #e9eaeb;
|
|
border-top: 1px solid #c4c5c6;
|
|
}
|
|
|
|
#cookiesManager{
|
|
cursor: pointer;
|
|
color: #485fc7;
|
|
}
|
|
|
|
.page-content {
|
|
display: flex;
|
|
flex-direction: row;
|
|
margin: 0;
|
|
padding: 0;
|
|
overflow: scroll;
|
|
padding: 0;
|
|
margin: 0;
|
|
}
|
|
|
|
header {
|
|
background-color: lightgrey;
|
|
height: 2%;
|
|
padding: 10px;
|
|
}
|
|
|
|
nav {
|
|
padding: 1em;
|
|
min-width: 200px;
|
|
}
|
|
|
|
main {
|
|
flex: 1;
|
|
padding: 0 5em 0 5em;
|
|
}
|
|
|
|
.logotitle {
|
|
font-size: 1.5em;
|
|
font-weight: bold;
|
|
margin: 5px;
|
|
}
|
|
|
|
.number {
|
|
all: unset;
|
|
}
|
|
|
|
.tag.token {
|
|
all: unset;
|
|
}
|
|
|
|
main ul {
|
|
list-style-type: disc;
|
|
padding-left: 30px;
|
|
margin-top: 10px;
|
|
}
|
|
|
|
h1 {
|
|
font-size: 2rem;
|
|
margin-top: 10px;
|
|
}
|
|
|
|
h2 {
|
|
font-size: 1.5rem;
|
|
margin-top: 10px;
|
|
font-weight: 500;
|
|
}
|
|
|
|
h3 {
|
|
font-size: 1rem;
|
|
margin-top: 10px;
|
|
font-weight: 500;
|
|
}
|
|
p {
|
|
margin-top: 10px;
|
|
}
|
|
|
|
/* Accessibility styling */
|
|
|
|
a {
|
|
color: #485fc7;
|
|
text-decoration: underline;
|
|
}
|
|
|
|
.menu-list a {
|
|
text-decoration: none;
|
|
}
|
|
|
|
|
|
.token.comment, .token.prolog, .token.doctype, .token.cdata {
|
|
color: #8093a5;
|
|
}
|
|
|
|
.token.property, .token.tag, .token.constant, .token.symbol, .token.deleted {
|
|
color: #ff36ab;
|
|
}
|
|
</style>
|
|
<script type="module" async="">import mermaid from "https://unpkg.com/mermaid@10/dist/mermaid.esm.min.mjs";document.addEventListener('DOMContentLoaded', mermaid.initialize({"loadOnSave":true}));</script>
|
|
<script>function showTooltip(o,e){o.trigger.className.includes("tooltipped")||(o.trigger.children[0].className="tooltipped tooltipped-s",o.trigger.children[0].ariaLabel=e)}window.addEventListener("load",()=>{var o=new ClipboardJS(".code-copy");o.on("success",o=>showTooltip(o,"Copied!")),o.on("error",o=>showTooltip(o,"Failed..."))});</script>
|
|
<script async="" src="https://cdn.jsdelivr.net/npm/clipboard@2.0.11/dist/clipboard.min.js"></script>
|
|
|
|
|
|
<script src="https://wcpstatic.microsoft.com/mscc/lib/v2/wcp-consent.js" type="text/javascript"></script>
|
|
<script>
|
|
function onConsentChanged(categoryPreferences) {
|
|
console.log("onConsentChanged", categoryPreferences);
|
|
}
|
|
|
|
var siteConsent
|
|
|
|
function initialize(){
|
|
var currentYear = new Date().getFullYear()
|
|
document.getElementById("copyright").innerHTML = `©️ ${currentYear} Microsoft`;
|
|
window.WcpConsent && WcpConsent.init("en-US", "cookie-banner", function (err, _siteConsent) {
|
|
if (!err) {
|
|
siteConsent = _siteConsent; //siteConsent is used to get the current consent
|
|
} else {
|
|
console.log("Error initializing WcpConsent: "+ err);
|
|
}
|
|
}, onConsentChanged, WcpConsent.themes.light);
|
|
}
|
|
|
|
addEventListener("DOMContentLoaded", initialize)
|
|
addEventListener("DOMContentLoaded", checkCookieManager)
|
|
|
|
function checkCookieManager(){
|
|
if(siteConsent.isConsentRequired){
|
|
document.getElementById("cookiesManager").style.display = 'block';
|
|
document.getElementById("divider").style.display = 'block';
|
|
}
|
|
else{
|
|
document.getElementById("cookiesManager").style.display = 'none';
|
|
document.getElementById("divider").style.display = 'none';
|
|
}
|
|
}
|
|
|
|
function manageConsent() {
|
|
if(siteConsent.isConsentRequired){
|
|
siteConsent.manageConsent();
|
|
}
|
|
}
|
|
</script>
|
|
|
|
</head>
|
|
<body>
|
|
<header>
|
|
<div id="cookie-banner"></div>
|
|
<a href="/"><span class="logotitle">GraphRAG</span></a>
|
|
</header>
|
|
<div class="page-content">
|
|
<!-- Sidebar -->
|
|
<aside class="menu">
|
|
<ul class="menu-list">
|
|
<li>
|
|
|
|
<a href="/">Welcome</a>
|
|
|
|
</li>
|
|
|
|
<!-- Get Started Links -->
|
|
<li>
|
|
|
|
<a href="/posts/get_started/">Get Started</a>
|
|
|
|
|
|
<a href="/posts/developing/">Developing</a>
|
|
|
|
</li>
|
|
|
|
<!-- Indexing Links -->
|
|
<li>
|
|
|
|
<a href="/posts/index/overview/">Indexing</a>
|
|
|
|
<ul><li>
|
|
<a href="/posts/index/0-architecture/">Architecture</a>
|
|
</li><li>
|
|
<a href="/posts/index/1-default_dataflow/">Dataflow</a>
|
|
</li><li>
|
|
<a href="/posts/index/2-cli/">CLI</a>
|
|
</li><li>
|
|
|
|
<a href="/posts/config/overview/">Configuration</a>
|
|
|
|
<ul>
|
|
<li>
|
|
<a href="/posts/config/env_vars">Using Env Vars</a>
|
|
</li>
|
|
<li>
|
|
<a href="/posts/config/json_yaml">Using JSON or YAML</a>
|
|
</li>
|
|
<li>
|
|
<a href="/posts/config/custom">Fully Custom</a>
|
|
</li>
|
|
<li>
|
|
<a href="/posts/config/template">Template</a>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
|
|
<a href="/posts/prompt_tuning/overview/">Prompt Tuning</a>
|
|
|
|
<ul>
|
|
<li>
|
|
|
|
<a href="/posts/prompt_tuning/auto_prompt_tuning/">Automatic Templating</a>
|
|
|
|
</li>
|
|
<li>
|
|
|
|
<a href="/posts/prompt_tuning/manual_prompt_tuning/">Manual Prompt Tuning</a>
|
|
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
|
|
|
|
<!-- Query Links -->
|
|
<li>
|
|
|
|
<a href="/posts/query/overview/">Query</a>
|
|
|
|
<ul><li>
|
|
<a href="/posts/query/1-local_search/" class="is-active" aria-current="page">Local Search</a>
|
|
</li><li>
|
|
<a href="/posts/query/2-question_generation/">Question Generation</a>
|
|
</li><li>
|
|
<a href="/posts/query/0-global_search/">Global Search</a>
|
|
</li><li>
|
|
<a href="/posts/query/3-cli/">CLI</a>
|
|
</li><li>
|
|
|
|
<a href="/posts/query/notebooks/overview/">Notebooks</a>
|
|
|
|
<ul>
|
|
<li>
|
|
<a href="/posts/query/notebooks/global_search_nb">Global Search</a>
|
|
</li>
|
|
<li>
|
|
<a href="/posts/query/notebooks/local_search_nb">Local Search</a>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</aside>
|
|
|
|
<!-- Main Content -->
|
|
<main>
|
|
<h1>Local Search 🔎</h1>
|
|
<h2>Entity-based Reasoning</h2>
|
|
<p>The <a href="https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/">local search</a> method combines structured data from the knowledge graph with unstructured data from the input documents to augment the LLM context with relevant entity information at query time. It is well-suited for answering questions that require an understanding of specific entities mentioned in the input documents (e.g., “What are the healing properties of chamomile?”).</p>
|
|
<h2>Methodology</h2>
|
|
<pre class="mermaid">--- title: Local Search Dataflow --- %%{ init: { 'flowchart': { 'curve': 'step' } } }%% flowchart LR uq[User Query] ---.1 ch1[Conversation<br/>History]---.1 .1--Entity<br/>Description<br/>Embedding--> ee[Extracted Entities] ee[Extracted Entities] ---.2--Entity-Text<br/>Unit Mapping--> ctu[Candidate<br/>Text Units]--Ranking + <br/>Filtering -->ptu[Prioritized<br/>Text Units]---.3 .2--Entity-Report<br/>Mapping--> ccr[Candidate<br/>Community Reports]--Ranking + <br/>Filtering -->pcr[Prioritized<br/>Community Reports]---.3 .2--Entity-Entity<br/>Relationships--> ce[Candidate<br/>Entities]--Ranking + <br/>Filtering -->pe[Prioritized<br/>Entities]---.3 .2--Entity-Entity<br/>Relationships--> cr[Candidate<br/>Relationships]--Ranking + <br/>Filtering -->pr[Prioritized<br/>Relationships]---.3 .2--Entity-Covariate<br/>Mappings--> cc[Candidate<br/>Covariates]--Ranking + <br/>Filtering -->pc[Prioritized<br/>Covariates]---.3 ch1 -->ch2[Conversation History]---.3 .3-->res[Response] classDef green fill:#26B653,stroke:#333,stroke-width:2px,color:#fff; classDef turquoise fill:#19CCD3,stroke:#333,stroke-width:2px,color:#fff; classDef rose fill:#DD8694,stroke:#333,stroke-width:2px,color:#fff; classDef orange fill:#F19914,stroke:#333,stroke-width:2px,color:#fff; classDef purple fill:#B356CD,stroke:#333,stroke-width:2px,color:#fff; classDef invisible fill:#fff,stroke:#fff,stroke-width:0px,color:#fff, width:0px; class uq,ch1 turquoise class ee green class ctu,ccr,ce,cr,cc rose class ptu,pcr,pe,pr,pc,ch2 orange class res purple class .1,.2,.3 invisible </pre>
|
|
<p>Given a user query and, optionally, the conversation history, the local search method identifies a set of entities from the knowledge graph that are semantically-related to the user input. These entities serve as access points into the knowledge graph, enabling the extraction of further relevant details such as connected entities, relationships, entity covariates, and community reports. Additionally, it also extracts relevant text chunks from the raw input documents that are associated with the identified entities. These candidate data sources are then prioritized and filtered to fit within a single context window of pre-defined size, which is used to generate a response to the user query.</p>
|
|
<h2>Configuration</h2>
|
|
<p>Below are the key parameters of the <a href="https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/search.py">LocalSearch class</a>:</p>
|
|
<ul>
|
|
<li><code>llm</code>: OpenAI model object to be used for response generation</li>
|
|
<li><code>context_builder</code>: <a href="https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/mixed_context.py">context builder</a> object to be used for preparing context data from collections of knowledge model objects</li>
|
|
<li><code>system_prompt</code>: prompt template used to generate the search response. Default template can be found at <a href="https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/system_prompt.py">system_prompt</a></li>
|
|
<li><code>response_type</code>: free-form text describing the desired response type and format (e.g., <code>Multiple Paragraphs</code>, <code>Multi-Page Report</code>)</li>
|
|
<li><code>llm_params</code>: a dictionary of additional parameters (e.g., temperature, max_tokens) to be passed to the LLM call</li>
|
|
<li><code>context_builder_params</code>: a dictionary of additional parameters to be passed to the <a href="https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/mixed_context.py"><code>context_builder</code></a> object when building context for the search prompt</li>
|
|
<li><code>callbacks</code>: optional callback functions, can be used to provide custom event handlers for LLM's completion streaming events</li>
|
|
</ul>
|
|
<h2>How to Use</h2>
|
|
<p>An example of a local search scenario can be found in the following <a href="../notebooks/local_search_nb">notebook</a>.</p>
|
|
|
|
</main>
|
|
</div>
|
|
<footer>
|
|
<a href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy</a>
|
|
|
|
|
<a href="https://go.microsoft.com/fwlink/?LinkId=2259814">Consumer Health Privacy</a>
|
|
|
|
|
<span id="cookiesManager" onClick="manageConsent();">Cookies</span>
|
|
<span id="divider">|</span>
|
|
<a href="https://go.microsoft.com/fwlink/?LinkID=206977">Terms of Use</a>
|
|
|
|
|
<a href="https://www.microsoft.com/trademarks">Trademarks</a>
|
|
|
|
|
<a href="https://www.microsoft.com" id="copyright"></a>
|
|
|
|
|
<a href="https://github.com/microsoft/graphrag">GitHub</a>
|
|
</footer>
|
|
</body>
|
|
</html> |