LightRAG/examples/insert_custom_kg.py

116 lines
3.6 KiB
Python
Raw Permalink Normal View History

2024-11-25 18:06:19 +08:00
import os
2024-11-27 15:20:10 +08:00
from lightrag import LightRAG
from lightrag.llm.openai import gpt_4o_mini_complete
2024-11-25 18:06:19 +08:00
#########
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
# import nest_asyncio
# nest_asyncio.apply()
#########
WORKING_DIR = "./custom_kg"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
)
custom_kg = {
"entities": [
{
"entity_name": "CompanyA",
"entity_type": "Organization",
"description": "A major technology company",
2024-11-27 15:20:10 +08:00
"source_id": "Source1",
2024-11-25 18:06:19 +08:00
},
{
"entity_name": "ProductX",
"entity_type": "Product",
"description": "A popular product developed by CompanyA",
2024-11-27 15:20:10 +08:00
"source_id": "Source1",
2024-11-25 18:06:19 +08:00
},
{
"entity_name": "PersonA",
"entity_type": "Person",
"description": "A renowned researcher in AI",
2024-11-27 15:20:10 +08:00
"source_id": "Source2",
2024-11-25 18:06:19 +08:00
},
{
"entity_name": "UniversityB",
"entity_type": "Organization",
"description": "A leading university specializing in technology and sciences",
2024-11-27 15:20:10 +08:00
"source_id": "Source2",
2024-11-25 18:06:19 +08:00
},
{
"entity_name": "CityC",
"entity_type": "Location",
"description": "A large metropolitan city known for its culture and economy",
2024-11-27 15:20:10 +08:00
"source_id": "Source3",
2024-11-25 18:06:19 +08:00
},
{
"entity_name": "EventY",
"entity_type": "Event",
"description": "An annual technology conference held in CityC",
2024-11-27 15:20:10 +08:00
"source_id": "Source3",
2024-11-25 18:06:19 +08:00
},
],
"relationships": [
{
"src_id": "CompanyA",
"tgt_id": "ProductX",
"description": "CompanyA develops ProductX",
"keywords": "develop, produce",
"weight": 1.0,
2024-11-27 15:20:10 +08:00
"source_id": "Source1",
2024-11-25 18:06:19 +08:00
},
{
"src_id": "PersonA",
"tgt_id": "UniversityB",
"description": "PersonA works at UniversityB",
"keywords": "employment, affiliation",
"weight": 0.9,
2024-11-27 15:20:10 +08:00
"source_id": "Source2",
2024-11-25 18:06:19 +08:00
},
{
"src_id": "CityC",
"tgt_id": "EventY",
"description": "EventY is hosted in CityC",
"keywords": "host, location",
"weight": 0.8,
2024-11-27 15:20:10 +08:00
"source_id": "Source3",
2024-11-25 18:06:19 +08:00
},
2024-12-04 19:44:04 +08:00
],
"chunks": [
2024-11-25 18:06:19 +08:00
{
2024-12-04 19:44:04 +08:00
"content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
"source_id": "Source1",
"source_chunk_index": 0,
},
{
"content": "One outstanding feature of ProductX is its advanced AI capabilities.",
"source_id": "Source1",
"chunk_order_index": 1,
2024-12-04 19:44:04 +08:00
},
{
"content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
"source_id": "Source2",
"source_chunk_index": 0,
2024-12-04 19:44:04 +08:00
},
{
"content": "EventY, held in CityC, attracts technology enthusiasts and companies from around the globe.",
"source_id": "Source3",
"source_chunk_index": 0,
2024-12-04 19:44:04 +08:00
},
{
"content": "None",
"source_id": "UNKNOWN",
"source_chunk_index": 0,
2024-11-27 15:20:10 +08:00
},
],
2024-11-25 18:06:19 +08:00
}
rag.insert_custom_kg(custom_kg)