Prompt utils

This commit is contained in:
Jake Poznanski 2024-10-01 16:02:24 +00:00
parent b6543a4f65
commit f99f6a6729

View File

@ -1,3 +1,5 @@
import re
# This is the prompt we use for getting chat gpt 4o to convert documents into our silver training data
def build_openai_silver_data_prompt(base_text: str) -> str:
return (
@ -21,4 +23,15 @@ def build_finetuning_prompt(base_text: str) -> str:
f"Just return the plain text representation of this document as if you were reading it naturally.\n"
f"Do not hallucinate.\n"
f"RAW_TEXT_START\n{base_text}\nRAW_TEXT_END"
)
)
def extract_raw_text(prompt: str) -> str:
pattern = r"RAW_TEXT_START\s*\n(.*?)\nRAW_TEXT_END"
# Use re.DOTALL to ensure that the dot matches newline characters
match = re.search(pattern, prompt, re.DOTALL)
if match:
return match.group(1).strip()
else
raise ValueError("Prompt does not contain raw text")