LLMs-from-scratch/ch07/01_main-chapter-code/ollama_evaluate.py

# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
#   - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch
#
# A minimal instruction finetuning file based on the code in chapter 7

import json
import psutil
from tqdm import tqdm
import urllib.request


def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"):
    # Create the data payload as a dictionary
    data = {
        "model": model,
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "options": {     # Settings below are required for deterministic responses
            "seed": 123,
            "temperature": 0,
            "num_ctx": 2048
        }
    }

    # Convert the dictionary to a JSON formatted string and encode it to bytes
    payload = json.dumps(data).encode("utf-8")

    # Create a request object, setting the method to POST and adding necessary headers
    request = urllib.request.Request(url, data=payload, method="POST")
    request.add_header("Content-Type", "application/json")

    # Send the request and capture the response
    response_data = ""
    with urllib.request.urlopen(request) as response:
        # Read and decode the response
        while True:
            line = response.readline().decode("utf-8")
            if not line:
                break
            response_json = json.loads(line)
            response_data += response_json["message"]["content"]

    return response_data


def check_if_running(process_name):
    running = False
    for proc in psutil.process_iter(["name"]):
        if process_name in proc.info["name"]:
            running = True
            break
    return running


def format_input(entry):
    instruction_text = (
        f"Below is an instruction that describes a task. "
        f"Write a response that appropriately completes the request."
        f"\n\n### Instruction:\n{entry['instruction']}"
    )

    input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""

    return instruction_text + input_text


def main(file_path):
    ollama_running = check_if_running("ollama")

    if not ollama_running:
        raise RuntimeError("Ollama not running. Launch ollama before proceeding.")
    print("Ollama running:", check_if_running("ollama"))

    with open(file_path, "r") as file:
        test_data = json.load(file)

    model = "llama3"
    scores = generate_model_scores(test_data, "model_response", model)
    print(f"Number of scores: {len(scores)} of {len(test_data)}")
    print(f"Average score: {sum(scores)/len(scores):.2f}\n")


def generate_model_scores(json_data, json_key, model="llama3"):
    scores = []
    for entry in tqdm(json_data, desc="Scoring entries"):
        if entry[json_key] == "":
            scores.append(0)
        else:
            prompt = (
                f"Given the input `{format_input(entry)}` "
                f"and correct output `{entry['output']}`, "
                f"score the model response `{entry[json_key]}`"
                f" on a scale from 0 to 100, where 100 is the best score. "
                f"Respond with the integer number only."
            )
            score = query_model(prompt, model)
            try:
                scores.append(int(score))
            except ValueError:
                print(f"Could not convert score: {score}")
                continue

    return scores


if __name__ == "__main__":

    import argparse

    parser = argparse.ArgumentParser(
        description="Evaluate model responses with ollama"
    )
    parser.add_argument(
        "--file_path",
        required=True,
        help=(
            "The path to the test dataset `.json` file with the"
            " `'output'` and `'model_response'` keys"
        )
    )
    args = parser.parse_args()

    main(file_path=args.file_path)
Add standalone finetuning and evaluation scripts for chapter 7 (#234) * add finetuning and eval scripts * update link * update links * fix link 2024-06-21 05:23:24 -05:00			`# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).`
			`# Source for "Build a Large Language Model From Scratch"`
			`# - https://www.manning.com/books/build-a-large-language-model-from-scratch`
			`# Code: https://github.com/rasbt/LLMs-from-scratch`
			`#`
			`# A minimal instruction finetuning file based on the code in chapter 7`

			`import json`
			`import psutil`
			`from tqdm import tqdm`
			`import urllib.request`


			`def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"):`
			`# Create the data payload as a dictionary`
			`data = {`
			`"model": model,`
			`"messages": [`
			`{"role": "user", "content": prompt}`
Use deterministic ollama settings (#250) * deterministic ollama settings * add missing file 2024-06-27 07:16:48 -05:00			`],`
			`"options": { # Settings below are required for deterministic responses`
			`"seed": 123,`
			`"temperature": 0,`
			`"num_ctx": 2048`
			`}`
Add standalone finetuning and evaluation scripts for chapter 7 (#234) * add finetuning and eval scripts * update link * update links * fix link 2024-06-21 05:23:24 -05:00			`}`

			`# Convert the dictionary to a JSON formatted string and encode it to bytes`
			`payload = json.dumps(data).encode("utf-8")`

			`# Create a request object, setting the method to POST and adding necessary headers`
			`request = urllib.request.Request(url, data=payload, method="POST")`
			`request.add_header("Content-Type", "application/json")`

			`# Send the request and capture the response`
			`response_data = ""`
			`with urllib.request.urlopen(request) as response:`
			`# Read and decode the response`
			`while True:`
			`line = response.readline().decode("utf-8")`
			`if not line:`
			`break`
			`response_json = json.loads(line)`
			`response_data += response_json["message"]["content"]`

			`return response_data`


			`def check_if_running(process_name):`
			`running = False`
			`for proc in psutil.process_iter(["name"]):`
			`if process_name in proc.info["name"]:`
			`running = True`
			`break`
			`return running`


			`def format_input(entry):`
			`instruction_text = (`
			`f"Below is an instruction that describes a task. "`
			`f"Write a response that appropriately completes the request."`
			`f"\n\n### Instruction:\n{entry['instruction']}"`
			`)`

			`input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""`

			`return instruction_text + input_text`


			`def main(file_path):`
			`ollama_running = check_if_running("ollama")`

			`if not ollama_running:`
			`raise RuntimeError("Ollama not running. Launch ollama before proceeding.")`
			`print("Ollama running:", check_if_running("ollama"))`

			`with open(file_path, "r") as file:`
			`test_data = json.load(file)`

			`model = "llama3"`
			`scores = generate_model_scores(test_data, "model_response", model)`
			`print(f"Number of scores: {len(scores)} of {len(test_data)}")`
			`print(f"Average score: {sum(scores)/len(scores):.2f}\n")`


			`def generate_model_scores(json_data, json_key, model="llama3"):`
			`scores = []`
			`for entry in tqdm(json_data, desc="Scoring entries"):`
Exercise solutions (#237) 2024-06-22 08:30:45 -05:00			`if entry[json_key] == "":`
			`scores.append(0)`
			`else:`
			`prompt = (`
			f"Given the input `{format_input(entry)}` "
			f"and correct output `{entry['output']}`, "
			f"score the model response `{entry[json_key]}`"
			`f" on a scale from 0 to 100, where 100 is the best score. "`
			`f"Respond with the integer number only."`
			`)`
			`score = query_model(prompt, model)`
			`try:`
			`scores.append(int(score))`
			`except ValueError:`
			`print(f"Could not convert score: {score}")`
			`continue`
Add standalone finetuning and evaluation scripts for chapter 7 (#234) * add finetuning and eval scripts * update link * update links * fix link 2024-06-21 05:23:24 -05:00
			`return scores`


			`if __name__ == "__main__":`

			`import argparse`

			`parser = argparse.ArgumentParser(`
Exercise solutions (#237) 2024-06-22 08:30:45 -05:00			`description="Evaluate model responses with ollama"`
Add standalone finetuning and evaluation scripts for chapter 7 (#234) * add finetuning and eval scripts * update link * update links * fix link 2024-06-21 05:23:24 -05:00			`)`
			`parser.add_argument(`
			`"--file_path",`
			`required=True,`
			`help=(`
			"The path to the test dataset `.json` file with the"
			" `'output'` and `'model_response'` keys"
			`)`
			`)`
			`args = parser.parse_args()`

			`main(file_path=args.file_path)`