| 
									
										
										
										
											2024-10-15 19:40:08 +08:00
										 |  |  | import os | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from lightrag import LightRAG, QueryParam | 
					
						
							| 
									
										
										
										
											2025-01-25 00:11:00 +01:00
										 |  |  | from lightrag.llm.hf import hf_model_complete, hf_embed | 
					
						
							| 
									
										
										
										
											2024-10-15 21:11:12 +08:00
										 |  |  | from lightrag.utils import EmbeddingFunc | 
					
						
							| 
									
										
										
										
											2024-10-19 09:43:17 +05:30
										 |  |  | from transformers import AutoModel, AutoTokenizer | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  | from lightrag.kg.shared_storage import initialize_pipeline_status | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import asyncio | 
					
						
							|  |  |  | import nest_asyncio | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | nest_asyncio.apply() | 
					
						
							| 
									
										
										
										
											2024-10-15 19:40:08 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | WORKING_DIR = "./dickens" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if not os.path.exists(WORKING_DIR): | 
					
						
							|  |  |  |     os.mkdir(WORKING_DIR) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:40:03 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  | async def initialize_rag(): | 
					
						
							|  |  |  |     rag = LightRAG( | 
					
						
							|  |  |  |         working_dir=WORKING_DIR, | 
					
						
							|  |  |  |         llm_model_func=hf_model_complete, | 
					
						
							|  |  |  |         llm_model_name="meta-llama/Llama-3.1-8B-Instruct", | 
					
						
							|  |  |  |         embedding_func=EmbeddingFunc( | 
					
						
							|  |  |  |             embedding_dim=384, | 
					
						
							|  |  |  |             max_token_size=5000, | 
					
						
							|  |  |  |             func=lambda texts: hf_embed( | 
					
						
							|  |  |  |                 texts, | 
					
						
							|  |  |  |                 tokenizer=AutoTokenizer.from_pretrained( | 
					
						
							|  |  |  |                     "sentence-transformers/all-MiniLM-L6-v2" | 
					
						
							|  |  |  |                 ), | 
					
						
							|  |  |  |                 embed_model=AutoModel.from_pretrained( | 
					
						
							|  |  |  |                     "sentence-transformers/all-MiniLM-L6-v2" | 
					
						
							|  |  |  |                 ), | 
					
						
							| 
									
										
										
										
											2024-10-19 09:43:17 +05:30
										 |  |  |             ), | 
					
						
							|  |  |  |         ), | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     await rag.initialize_storages() | 
					
						
							|  |  |  |     await initialize_pipeline_status() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return rag | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:40:03 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  | def main(): | 
					
						
							|  |  |  |     rag = asyncio.run(initialize_rag()) | 
					
						
							| 
									
										
										
										
											2024-10-15 19:40:08 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  |     with open("./book.txt", "r", encoding="utf-8") as f: | 
					
						
							|  |  |  |         rag.insert(f.read()) | 
					
						
							| 
									
										
										
										
											2024-10-15 19:40:08 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  |     # Perform naive search | 
					
						
							|  |  |  |     print( | 
					
						
							| 
									
										
										
										
											2025-03-03 18:40:03 +08:00
										 |  |  |         rag.query( | 
					
						
							|  |  |  |             "What are the top themes in this story?", param=QueryParam(mode="naive") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-10-15 19:40:08 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  |     # Perform local search | 
					
						
							|  |  |  |     print( | 
					
						
							| 
									
										
										
										
											2025-03-03 18:40:03 +08:00
										 |  |  |         rag.query( | 
					
						
							|  |  |  |             "What are the top themes in this story?", param=QueryParam(mode="local") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-10-15 19:40:08 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  |     # Perform global search | 
					
						
							|  |  |  |     print( | 
					
						
							| 
									
										
										
										
											2025-03-03 18:40:03 +08:00
										 |  |  |         rag.query( | 
					
						
							|  |  |  |             "What are the top themes in this story?", param=QueryParam(mode="global") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-10-15 19:40:08 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  |     # Perform hybrid search | 
					
						
							|  |  |  |     print( | 
					
						
							| 
									
										
										
										
											2025-03-03 18:40:03 +08:00
										 |  |  |         rag.query( | 
					
						
							|  |  |  |             "What are the top themes in this story?", param=QueryParam(mode="hybrid") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-10-15 19:40:08 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:40:03 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-03 18:33:42 +08:00
										 |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     main() |