mirror of
				https://github.com/HKUDS/LightRAG.git
				synced 2025-10-31 09:49:54 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			109 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			109 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import re
 | |
| import json
 | |
| import jsonlines
 | |
| 
 | |
| from openai import OpenAI
 | |
| 
 | |
| 
 | |
| def batch_eval(query_file, result1_file, result2_file, output_file_path):
 | |
|     client = OpenAI()
 | |
| 
 | |
|     with open(query_file, "r") as f:
 | |
|         data = f.read()
 | |
| 
 | |
|     queries = re.findall(r"- Question \d+: (.+)", data)
 | |
| 
 | |
|     with open(result1_file, "r") as f:
 | |
|         answers1 = json.load(f)
 | |
|     answers1 = [i["result"] for i in answers1]
 | |
| 
 | |
|     with open(result2_file, "r") as f:
 | |
|         answers2 = json.load(f)
 | |
|     answers2 = [i["result"] for i in answers2]
 | |
| 
 | |
|     requests = []
 | |
|     for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2)):
 | |
|         sys_prompt = """
 | |
|         ---Role---
 | |
|         You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
 | |
|         """
 | |
| 
 | |
|         prompt = f"""
 | |
|         You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
 | |
| 
 | |
|         - **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
 | |
|         - **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
 | |
|         - **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?
 | |
| 
 | |
|         For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.
 | |
| 
 | |
|         Here is the question:
 | |
|         {query}
 | |
| 
 | |
|         Here are the two answers:
 | |
| 
 | |
|         **Answer 1:**
 | |
|         {answer1}
 | |
| 
 | |
|         **Answer 2:**
 | |
|         {answer2}
 | |
| 
 | |
|         Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.
 | |
| 
 | |
|         Output your evaluation in the following JSON format:
 | |
| 
 | |
|         {{
 | |
|             "Comprehensiveness": {{
 | |
|                 "Winner": "[Answer 1 or Answer 2]",
 | |
|                 "Explanation": "[Provide explanation here]"
 | |
|             }},
 | |
|             "Empowerment": {{
 | |
|                 "Winner": "[Answer 1 or Answer 2]",
 | |
|                 "Explanation": "[Provide explanation here]"
 | |
|             }},
 | |
|             "Overall Winner": {{
 | |
|                 "Winner": "[Answer 1 or Answer 2]",
 | |
|                 "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
 | |
|             }}
 | |
|         }}
 | |
|         """
 | |
| 
 | |
|         request_data = {
 | |
|             "custom_id": f"request-{i+1}",
 | |
|             "method": "POST",
 | |
|             "url": "/v1/chat/completions",
 | |
|             "body": {
 | |
|                 "model": "gpt-4o-mini",
 | |
|                 "messages": [
 | |
|                     {"role": "system", "content": sys_prompt},
 | |
|                     {"role": "user", "content": prompt},
 | |
|                 ],
 | |
|             },
 | |
|         }
 | |
| 
 | |
|         requests.append(request_data)
 | |
| 
 | |
|     with jsonlines.open(output_file_path, mode="w") as writer:
 | |
|         for request in requests:
 | |
|             writer.write(request)
 | |
| 
 | |
|     print(f"Batch API requests written to {output_file_path}")
 | |
| 
 | |
|     batch_input_file = client.files.create(
 | |
|         file=open(output_file_path, "rb"), purpose="batch"
 | |
|     )
 | |
|     batch_input_file_id = batch_input_file.id
 | |
| 
 | |
|     batch = client.batches.create(
 | |
|         input_file_id=batch_input_file_id,
 | |
|         endpoint="/v1/chat/completions",
 | |
|         completion_window="24h",
 | |
|         metadata={"description": "nightly eval job"},
 | |
|     )
 | |
| 
 | |
|     print(f"Batch {batch.id} has been created.")
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     batch_eval()
 | 
