Add linter functionality with CLI integration for log analysis

This commit is contained in:
gagb 2025-03-19 14:02:04 -07:00
parent 69292e6ff4
commit 98a2827265
6 changed files with 389 additions and 0 deletions

View File

@ -7,6 +7,7 @@ from .remove_missing_cmd import remove_missing_cli
from .run_cmd import run_cli
from .tabulate_cmd import tabulate_cli
from .version import __version__
from .linter.cli import lint_cli
class CommandSpec(TypedDict):
@ -33,6 +34,11 @@ def main(args: Optional[List[str]] = None) -> None:
"description": "tabulate the results of a previous run",
"function": tabulate_cli,
},
{
"command": "lint",
"description": "lint the benchmark configuration",
"function": lint_cli,
},
{
"command": "remove_missing",
"description": "remove folders with missing results",

View File

@ -0,0 +1,4 @@
# __init__.py
from ._base import Code, Document, CodedDocument, BaseQualitativeCoder
__all__ = ["Code", "Document", "CodedDocument", "BaseQualitativeCoder"]

View File

@ -0,0 +1,81 @@
import json
import hashlib
import re
from typing import Protocol, List, Set, Optional
from pydantic import BaseModel
class Document(BaseModel):
text: str
name: Optional[str] = None
def __hash__(self) -> int:
return int(hashlib.md5(self.text.encode("utf-8")).hexdigest(), 16)
class CodeExample(BaseModel):
"""
Represents an example associated with a code.
Attributes:
line (int): The line number in the file where the code example starts.
line_end (int): The line number in the file where the code example ends.
reason (str): A description explaining the purpose or context of the
code example.
"""
line: int
line_end: int
reason: str
class Code(BaseModel):
name: str
definition: str
examples: List[CodeExample] # changed from List[str]
id: Optional[int] = None
merged_from: Optional[List[int]] = None
def __init__(
self,
name: str,
definition: str,
examples: List[CodeExample],
id: Optional[int] = None,
merged_from: Optional[List[int]] = None,
):
super().__init__(name=name, definition=definition, examples=examples)
self.name = re.sub(r"[^a-z-]", "", self.name.lower().replace(" ", "-"))
self.id = int(
hashlib.md5((self.name + self.definition).encode("utf-8")).hexdigest(), 16
)
self.merged_from = None
def __hash__(self) -> int:
if self.id is None:
raise ValueError("Code ID is not set.")
return self.id
def add_merged_from(self, code_id: int) -> None:
if self.merged_from is None:
self.merged_from = []
if code_id not in self.merged_from:
self.merged_from.append(code_id)
class CodedDocument(BaseModel):
doc: Document
codes: Set[Code]
@classmethod
def from_json(cls, json_str: str) -> "CodedDocument":
data = json.loads(json_str)
doc = Document(**data["doc"])
codes = {Code(**code) for code in data["codes"]}
return cls(doc=doc, codes=codes)
class BaseQualitativeCoder(Protocol):
def code_document(
self, doc: Document, code_set: Optional[Set[Code]]
) -> Optional[CodedDocument]: ...

View File

@ -0,0 +1,86 @@
import os
import argparse
from typing import List, Sequence, Optional
from ._base import Document, CodedDocument
from .coders.oai_coder import OAIQualitativeCoder
def prepend_line_numbers(lines: List[str]) -> List[str]:
"""
Returns a list of strings with each line prefixed by its right-justified
line number.
"""
width = len(str(len(lines)))
new_lines = [f"{i+1:>{width}}: {line}" for i, line in enumerate(lines)]
return new_lines
def load_log_file(path: str, prepend_numbers: bool = False) -> Document:
with open(path, "r") as f:
lines = f.readlines()
if prepend_numbers:
lines = prepend_line_numbers(lines)
text = "".join(lines)
return Document(text=text, name=os.path.abspath(path))
def code_log(path: str) -> Optional[CodedDocument]:
coder = OAIQualitativeCoder()
if os.path.isfile(path):
doc = load_log_file(path, prepend_numbers=True)
coded_doc = coder.code_document(doc)
return coded_doc
else:
raise FileNotFoundError(f"File {path} does not exist.")
def print_coded_results(input_path: str, coded_doc: CodedDocument) -> None:
num_errors: int = 0
for code in coded_doc.codes:
print(f"\033[31mCategory: {code.name}\033[0m: {code.definition}")
for example in code.examples:
print(
f"\033[1m{input_path}\033[0m:{example.line}"
f":{example.line_end}\t{example.reason}"
)
num_errors += 1
print("\n")
print(f"Found {num_errors} errors in {input_path}.")
print("\n")
def code_command(input_path: str) -> None:
"""
Process the given input path by coding log files.
"""
if os.path.isfile(input_path):
coded_doc = code_log(input_path)
if coded_doc is None:
raise ValueError("Failed to code the document.")
print_coded_results(input_path, coded_doc)
else:
print("Invalid input path.")
def lint_cli(args: Sequence[str]) -> None:
invocation_cmd = args[0]
args = args[1:]
parser = argparse.ArgumentParser(
prog=invocation_cmd,
description=f"{invocation_cmd} will analyze a console log."
" And detect errors/inefficiencies in the log files."
)
parser.add_argument("logfile",
type=str,
help="Path to a log file.")
parsed_args = parser.parse_args(args)
code_command(parsed_args.logfile)

View File

@ -0,0 +1,212 @@
import os
import re
from typing import List, Set, Optional
from pydantic import BaseModel
from openai import OpenAI
from .._base import CodedDocument, Document, Code
from .._base import BaseQualitativeCoder
class CodeList(BaseModel):
code_list: List[Code]
def remove_control_characters(text: str) -> str:
"""
Remove control characters from the text.
"""
return re.sub(r"[\x00-\x1F\x7F]", "", text)
class OAIQualitativeCoder(BaseQualitativeCoder):
DEFAULT_MODEL = "gpt-4o"
def __init__(self, cache_dir: str = ".cache", model: str = DEFAULT_MODEL, cache_enabled: bool = False) -> None:
self.client = OpenAI()
self.cache_dir = cache_dir
self.model = model
self.cache_enabled = cache_enabled
def code_document(
self,
doc: Document,
code_set: Optional[Set[Code]] = None,
) -> Optional[CodedDocument]:
# get hash of the document
doc_hash = hash(doc)
cache_file = os.path.join(self.cache_dir, f"{doc_hash}.json") if self.cache_enabled else None
if self.cache_enabled:
if not os.path.exists(self.cache_dir):
os.makedirs(self.cache_dir)
if cache_file and os.path.exists(cache_file):
with open(cache_file, "r") as f:
cached_coded_doc_json = f.read()
return CodedDocument.from_json(cached_coded_doc_json)
# sanitize the doc before passing it to openai
doc.text = remove_control_characters(doc.text)
coded_document: Optional[CodedDocument] = None
if code_set is None:
completion = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": """You are an expert qualitative researcher.
Given a list of dcocuments containing errors below, generate a list of (error) codes.
Each code should contains:
- at least 3 words, max 4 word, hyphenated.
For example, the name could be of the format "lack-of-word2",
"failed-to-bar", "excessive-use-of-magenta". Name should adhere to
Joseph M. Williams' writing principles of clarity, conciseness, and coherence.
Ensure each code name is lower-case, hyphenated, and directly reflects the
concept it represents. Avoid ambiguous or overly complex terms, and prioritize
simplicity, precision, and readability in the naming.
The code names should pass the 'clarity and grace' test by being easy to
understand, descriptive, and reflective of the content they categorize.
- suggest codes that are similar to good code names. avoid code names that are
similar to bad code names.
- The definition should be simple worded and practical. At least 2 sentences,
max 3. It should be written in past tense.
It should convey how a labeller could apply this code to future logs, without
mentioning the word "labeller". The definition should be specific enough to be
useful in debugging. It should be very concrete. And should be well thought and
make sense. Bull shitting will not earn you any points.
- The examples should be a list. Each example should be descriptive between
2-3 sentences. Examples should be concrete, informative and not vague. Provide
at max 20 salient examples. Examples should contain a lot of detail about what
happened and should refer to incidents in the log.
- The list of codes must mutually exclusive.
# GOOD EXAMPLES OF FINAL CODE NAMES/CLUSTERS
* looped-without-progress
* repeated-unsuccessful-actions
* repeated-syntax-errors
* exceeded-context-window-limits
* encountered-security-risks
* failure-to-switch-strategy
* exceeded-resource-limits
* attempted-to-handle-excessive-data
* no-errors-detected
These names are high-level but also concrete. They exactly mention the type of
error, issue, gap that has been identified.
## BAD EXAMPLES OF FINAL CODE NAMES/CLUSTERS
* mismanaged-data-utilization -- too high level
* incomplete-or-misguided-execution -- too high level
* misaligned-agent-interactions -- too high level
* mismanaged-task-strategies -- too high level
* resource-inefficiencies -- vague
* communication-issues -- vague
* coordination-issues -- too high level and vague
* operational-failures
* execution-errors -- too high level
* navigation-issues -- too concise
* adaptive-failures -- too concise
* successful-processes -- I dont like the word processes
* system-constraints
* configuration-issues
* information-inaccuracies -- too high level
* process-improvements -- vague, not an error
* inadequate-error-response -- too high-level, unclear what kind of errors
* specific-access-issues -- makes no sense
* strategy-inefficiency -- strategy is too high level
* error-management-gaps -- unclear what error management means
* error-handling-deficiency -- unclear what kind of errors
* coordination-breakdown -- unclear what coordination means
* muddled-task-execution -- unclear what kind of tasks were muddled
* task-completion-gaps -- too high level
The above names are too high level and unclear. Please DO NOT use such names.
""",
},
{
"role": "user",
"content": doc.text,
},
],
response_format=CodeList,
)
message = completion.choices[0].message
if message.parsed and len(message.parsed.code_list) > 0:
coded_document = CodedDocument(
doc=doc, codes=set(message.parsed.code_list)
)
else:
print(message.refusal)
raise ValueError("Error in coding document with OpenAI")
else:
code_to_str = "\n".join(
[
(
f"\n---\nCode Name: {code.name}\n"
f"Definition: {code.definition}\n"
f"Examples: {code.examples}\n---\n"
)
for code in code_set
]
)
completion = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": """You are an expert qualitative researcher.
You can answer any questions about coding logs.""",
},
{
"role": "user",
"content": f"""
## Context
The text below shows a log containing errors. Your task is to code the log with
the following codes. Generate a list of codes for the log below.
Only use the codes from the list below. Do not create new codes.
Modify the examples of the codes to fit the context of the log.
Your example should be informative to narrow down the details of the error in
the context of the example.
## Codes
{code_to_str}
## Log
{doc.text}
""",
},
],
response_format=CodeList,
)
message = completion.choices[0].message
if message.parsed and len(message.parsed.code_list) > 0:
code_list = message.parsed.code_list
# filter out codes whose names are not in the code_set
code_set_names = {code.name for code in code_set}
code_list = [code for code in code_list if code.name in code_set_names]
coded_document = CodedDocument(doc=doc, codes=set(code_list))
if coded_document is None:
raise ValueError("Error in coding document with OpenAI")
if self.cache_enabled and cache_file:
with open(cache_file, "w") as f:
f.write(coded_document.model_dump_json(indent=4))
return coded_document