| 
									
										
										
										
											2023-05-31 22:03:15 +08:00
										 |  |  | import json | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-11 16:40:52 +08:00
										 |  |  | from core.llm_generator.output_parser.errors import OutputParserError | 
					
						
							| 
									
										
										
										
											2023-05-31 22:03:15 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def parse_json_markdown(json_string: str) -> dict: | 
					
						
							| 
									
										
										
										
											2024-10-03 10:20:56 +08:00
										 |  |  |     # Get json from the backticks/braces | 
					
						
							| 
									
										
										
										
											2023-05-31 22:03:15 +08:00
										 |  |  |     json_string = json_string.strip() | 
					
						
							| 
									
										
										
										
											2024-10-03 10:20:56 +08:00
										 |  |  |     starts = ["```json", "```", "``", "`", "{"] | 
					
						
							|  |  |  |     ends = ["```", "``", "`", "}"] | 
					
						
							|  |  |  |     end_index = -1 | 
					
						
							| 
									
										
										
										
											2024-11-07 14:02:30 +08:00
										 |  |  |     start_index = 0 | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |     parsed: dict = {} | 
					
						
							| 
									
										
										
										
											2024-10-03 10:20:56 +08:00
										 |  |  |     for s in starts: | 
					
						
							|  |  |  |         start_index = json_string.find(s) | 
					
						
							|  |  |  |         if start_index != -1: | 
					
						
							|  |  |  |             if json_string[start_index] != "{": | 
					
						
							|  |  |  |                 start_index += len(s) | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |     if start_index != -1: | 
					
						
							|  |  |  |         for e in ends: | 
					
						
							|  |  |  |             end_index = json_string.rfind(e, start_index) | 
					
						
							|  |  |  |             if end_index != -1: | 
					
						
							|  |  |  |                 if json_string[end_index] == "}": | 
					
						
							|  |  |  |                     end_index += 1 | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |     if start_index != -1 and end_index != -1 and start_index < end_index: | 
					
						
							|  |  |  |         extracted_content = json_string[start_index:end_index].strip() | 
					
						
							| 
									
										
										
										
											2023-05-31 22:03:15 +08:00
										 |  |  |         parsed = json.loads(extracted_content) | 
					
						
							|  |  |  |     else: | 
					
						
							| 
									
										
										
										
											2024-12-22 10:40:56 +08:00
										 |  |  |         raise ValueError("could not find json block in the output.") | 
					
						
							| 
									
										
										
										
											2023-05-31 22:03:15 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return parsed | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-09 15:21:33 +08:00
										 |  |  | def parse_and_check_json_markdown(text: str, expected_keys: list[str]) -> dict: | 
					
						
							| 
									
										
										
										
											2023-05-31 22:03:15 +08:00
										 |  |  |     try: | 
					
						
							|  |  |  |         json_obj = parse_json_markdown(text) | 
					
						
							|  |  |  |     except json.JSONDecodeError as e: | 
					
						
							| 
									
										
										
										
											2024-12-22 10:40:56 +08:00
										 |  |  |         raise OutputParserError(f"got invalid json object. error: {e}") | 
					
						
							| 
									
										
										
										
											2023-05-31 22:03:15 +08:00
										 |  |  |     for key in expected_keys: | 
					
						
							|  |  |  |         if key not in json_obj: | 
					
						
							| 
									
										
										
										
											2024-09-11 16:40:52 +08:00
										 |  |  |             raise OutputParserError( | 
					
						
							| 
									
										
										
										
											2024-12-22 10:40:56 +08:00
										 |  |  |                 f"got invalid return object. expected key `{key}` to be present, but got {json_obj}" | 
					
						
							| 
									
										
										
										
											2023-05-31 22:03:15 +08:00
										 |  |  |             ) | 
					
						
							|  |  |  |     return json_obj |