mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-06-26 23:50:03 +00:00
127 lines
3.9 KiB
Python
127 lines
3.9 KiB
Python
# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
|
|
# Source for "Build a Large Language Model From Scratch"
|
|
# - https://www.manning.com/books/build-a-large-language-model-from-scratch
|
|
# Code: https://github.com/rasbt/LLMs-from-scratch
|
|
#
|
|
# A minimal instruction finetuning file based on the code in chapter 7
|
|
|
|
import json
|
|
import psutil
|
|
from tqdm import tqdm
|
|
import urllib.request
|
|
|
|
|
|
def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"):
|
|
# Create the data payload as a dictionary
|
|
data = {
|
|
"model": model,
|
|
"messages": [
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
"options": { # Settings below are required for deterministic responses
|
|
"seed": 123,
|
|
"temperature": 0,
|
|
"num_ctx": 2048
|
|
}
|
|
}
|
|
|
|
# Convert the dictionary to a JSON formatted string and encode it to bytes
|
|
payload = json.dumps(data).encode("utf-8")
|
|
|
|
# Create a request object, setting the method to POST and adding necessary headers
|
|
request = urllib.request.Request(url, data=payload, method="POST")
|
|
request.add_header("Content-Type", "application/json")
|
|
|
|
# Send the request and capture the response
|
|
response_data = ""
|
|
with urllib.request.urlopen(request) as response:
|
|
# Read and decode the response
|
|
while True:
|
|
line = response.readline().decode("utf-8")
|
|
if not line:
|
|
break
|
|
response_json = json.loads(line)
|
|
response_data += response_json["message"]["content"]
|
|
|
|
return response_data
|
|
|
|
|
|
def check_if_running(process_name):
|
|
running = False
|
|
for proc in psutil.process_iter(["name"]):
|
|
if process_name in proc.info["name"]:
|
|
running = True
|
|
break
|
|
return running
|
|
|
|
|
|
def format_input(entry):
|
|
instruction_text = (
|
|
f"Below is an instruction that describes a task. "
|
|
f"Write a response that appropriately completes the request."
|
|
f"\n\n### Instruction:\n{entry['instruction']}"
|
|
)
|
|
|
|
input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""
|
|
|
|
return instruction_text + input_text
|
|
|
|
|
|
def main(file_path):
|
|
ollama_running = check_if_running("ollama")
|
|
|
|
if not ollama_running:
|
|
raise RuntimeError("Ollama not running. Launch ollama before proceeding.")
|
|
print("Ollama running:", check_if_running("ollama"))
|
|
|
|
with open(file_path, "r") as file:
|
|
test_data = json.load(file)
|
|
|
|
model = "llama3"
|
|
scores = generate_model_scores(test_data, "model_response", model)
|
|
print(f"Number of scores: {len(scores)} of {len(test_data)}")
|
|
print(f"Average score: {sum(scores)/len(scores):.2f}\n")
|
|
|
|
|
|
def generate_model_scores(json_data, json_key, model="llama3"):
|
|
scores = []
|
|
for entry in tqdm(json_data, desc="Scoring entries"):
|
|
if entry[json_key] == "":
|
|
scores.append(0)
|
|
else:
|
|
prompt = (
|
|
f"Given the input `{format_input(entry)}` "
|
|
f"and correct output `{entry['output']}`, "
|
|
f"score the model response `{entry[json_key]}`"
|
|
f" on a scale from 0 to 100, where 100 is the best score. "
|
|
f"Respond with the integer number only."
|
|
)
|
|
score = query_model(prompt, model)
|
|
try:
|
|
scores.append(int(score))
|
|
except ValueError:
|
|
print(f"Could not convert score: {score}")
|
|
continue
|
|
|
|
return scores
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Evaluate model responses with ollama"
|
|
)
|
|
parser.add_argument(
|
|
"--file_path",
|
|
required=True,
|
|
help=(
|
|
"The path to the test dataset `.json` file with the"
|
|
" `'output'` and `'model_response'` keys"
|
|
)
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
main(file_path=args.file_path)
|