mirror of
https://github.com/microsoft/autogen.git
synced 2025-12-26 14:38:50 +00:00
Add linter functionality with CLI integration for log analysis
This commit is contained in:
parent
69292e6ff4
commit
98a2827265
@ -7,6 +7,7 @@ from .remove_missing_cmd import remove_missing_cli
|
||||
from .run_cmd import run_cli
|
||||
from .tabulate_cmd import tabulate_cli
|
||||
from .version import __version__
|
||||
from .linter.cli import lint_cli
|
||||
|
||||
|
||||
class CommandSpec(TypedDict):
|
||||
@ -33,6 +34,11 @@ def main(args: Optional[List[str]] = None) -> None:
|
||||
"description": "tabulate the results of a previous run",
|
||||
"function": tabulate_cli,
|
||||
},
|
||||
{
|
||||
"command": "lint",
|
||||
"description": "lint the benchmark configuration",
|
||||
"function": lint_cli,
|
||||
},
|
||||
{
|
||||
"command": "remove_missing",
|
||||
"description": "remove folders with missing results",
|
||||
|
||||
4
python/packages/agbench/src/agbench/linter/__init__.py
Normal file
4
python/packages/agbench/src/agbench/linter/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# __init__.py
|
||||
from ._base import Code, Document, CodedDocument, BaseQualitativeCoder
|
||||
|
||||
__all__ = ["Code", "Document", "CodedDocument", "BaseQualitativeCoder"]
|
||||
81
python/packages/agbench/src/agbench/linter/_base.py
Normal file
81
python/packages/agbench/src/agbench/linter/_base.py
Normal file
@ -0,0 +1,81 @@
|
||||
import json
|
||||
import hashlib
|
||||
import re
|
||||
from typing import Protocol, List, Set, Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class Document(BaseModel):
|
||||
text: str
|
||||
name: Optional[str] = None
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return int(hashlib.md5(self.text.encode("utf-8")).hexdigest(), 16)
|
||||
|
||||
|
||||
class CodeExample(BaseModel):
|
||||
"""
|
||||
Represents an example associated with a code.
|
||||
|
||||
Attributes:
|
||||
line (int): The line number in the file where the code example starts.
|
||||
line_end (int): The line number in the file where the code example ends.
|
||||
reason (str): A description explaining the purpose or context of the
|
||||
code example.
|
||||
"""
|
||||
|
||||
line: int
|
||||
line_end: int
|
||||
reason: str
|
||||
|
||||
|
||||
class Code(BaseModel):
|
||||
name: str
|
||||
definition: str
|
||||
examples: List[CodeExample] # changed from List[str]
|
||||
id: Optional[int] = None
|
||||
merged_from: Optional[List[int]] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
definition: str,
|
||||
examples: List[CodeExample],
|
||||
id: Optional[int] = None,
|
||||
merged_from: Optional[List[int]] = None,
|
||||
):
|
||||
super().__init__(name=name, definition=definition, examples=examples)
|
||||
self.name = re.sub(r"[^a-z-]", "", self.name.lower().replace(" ", "-"))
|
||||
self.id = int(
|
||||
hashlib.md5((self.name + self.definition).encode("utf-8")).hexdigest(), 16
|
||||
)
|
||||
self.merged_from = None
|
||||
|
||||
def __hash__(self) -> int:
|
||||
if self.id is None:
|
||||
raise ValueError("Code ID is not set.")
|
||||
return self.id
|
||||
|
||||
def add_merged_from(self, code_id: int) -> None:
|
||||
if self.merged_from is None:
|
||||
self.merged_from = []
|
||||
if code_id not in self.merged_from:
|
||||
self.merged_from.append(code_id)
|
||||
|
||||
|
||||
class CodedDocument(BaseModel):
|
||||
doc: Document
|
||||
codes: Set[Code]
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> "CodedDocument":
|
||||
data = json.loads(json_str)
|
||||
doc = Document(**data["doc"])
|
||||
codes = {Code(**code) for code in data["codes"]}
|
||||
return cls(doc=doc, codes=codes)
|
||||
|
||||
|
||||
class BaseQualitativeCoder(Protocol):
|
||||
def code_document(
|
||||
self, doc: Document, code_set: Optional[Set[Code]]
|
||||
) -> Optional[CodedDocument]: ...
|
||||
86
python/packages/agbench/src/agbench/linter/cli.py
Normal file
86
python/packages/agbench/src/agbench/linter/cli.py
Normal file
@ -0,0 +1,86 @@
|
||||
import os
|
||||
import argparse
|
||||
from typing import List, Sequence, Optional
|
||||
from ._base import Document, CodedDocument
|
||||
from .coders.oai_coder import OAIQualitativeCoder
|
||||
|
||||
|
||||
def prepend_line_numbers(lines: List[str]) -> List[str]:
|
||||
"""
|
||||
Returns a list of strings with each line prefixed by its right-justified
|
||||
line number.
|
||||
"""
|
||||
width = len(str(len(lines)))
|
||||
new_lines = [f"{i+1:>{width}}: {line}" for i, line in enumerate(lines)]
|
||||
return new_lines
|
||||
|
||||
|
||||
def load_log_file(path: str, prepend_numbers: bool = False) -> Document:
|
||||
with open(path, "r") as f:
|
||||
lines = f.readlines()
|
||||
if prepend_numbers:
|
||||
lines = prepend_line_numbers(lines)
|
||||
|
||||
text = "".join(lines)
|
||||
return Document(text=text, name=os.path.abspath(path))
|
||||
|
||||
|
||||
def code_log(path: str) -> Optional[CodedDocument]:
|
||||
coder = OAIQualitativeCoder()
|
||||
|
||||
if os.path.isfile(path):
|
||||
doc = load_log_file(path, prepend_numbers=True)
|
||||
coded_doc = coder.code_document(doc)
|
||||
return coded_doc
|
||||
else:
|
||||
raise FileNotFoundError(f"File {path} does not exist.")
|
||||
|
||||
|
||||
def print_coded_results(input_path: str, coded_doc: CodedDocument) -> None:
|
||||
num_errors: int = 0
|
||||
|
||||
for code in coded_doc.codes:
|
||||
print(f"\033[31mCategory: {code.name}\033[0m: {code.definition}")
|
||||
for example in code.examples:
|
||||
print(
|
||||
f"\033[1m{input_path}\033[0m:{example.line}"
|
||||
f":{example.line_end}\t{example.reason}"
|
||||
)
|
||||
num_errors += 1
|
||||
print("\n")
|
||||
print(f"Found {num_errors} errors in {input_path}.")
|
||||
print("\n")
|
||||
|
||||
|
||||
def code_command(input_path: str) -> None:
|
||||
"""
|
||||
Process the given input path by coding log files.
|
||||
"""
|
||||
if os.path.isfile(input_path):
|
||||
coded_doc = code_log(input_path)
|
||||
if coded_doc is None:
|
||||
raise ValueError("Failed to code the document.")
|
||||
print_coded_results(input_path, coded_doc)
|
||||
else:
|
||||
print("Invalid input path.")
|
||||
|
||||
|
||||
def lint_cli(args: Sequence[str]) -> None:
|
||||
|
||||
invocation_cmd = args[0]
|
||||
|
||||
args = args[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=invocation_cmd,
|
||||
description=f"{invocation_cmd} will analyze a console log."
|
||||
" And detect errors/inefficiencies in the log files."
|
||||
)
|
||||
|
||||
parser.add_argument("logfile",
|
||||
type=str,
|
||||
help="Path to a log file.")
|
||||
|
||||
parsed_args = parser.parse_args(args)
|
||||
|
||||
code_command(parsed_args.logfile)
|
||||
212
python/packages/agbench/src/agbench/linter/coders/oai_coder.py
Normal file
212
python/packages/agbench/src/agbench/linter/coders/oai_coder.py
Normal file
@ -0,0 +1,212 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
from typing import List, Set, Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from .._base import CodedDocument, Document, Code
|
||||
from .._base import BaseQualitativeCoder
|
||||
|
||||
|
||||
class CodeList(BaseModel):
|
||||
code_list: List[Code]
|
||||
|
||||
|
||||
def remove_control_characters(text: str) -> str:
|
||||
"""
|
||||
Remove control characters from the text.
|
||||
"""
|
||||
return re.sub(r"[\x00-\x1F\x7F]", "", text)
|
||||
|
||||
|
||||
class OAIQualitativeCoder(BaseQualitativeCoder):
|
||||
DEFAULT_MODEL = "gpt-4o"
|
||||
|
||||
def __init__(self, cache_dir: str = ".cache", model: str = DEFAULT_MODEL, cache_enabled: bool = False) -> None:
|
||||
self.client = OpenAI()
|
||||
self.cache_dir = cache_dir
|
||||
self.model = model
|
||||
self.cache_enabled = cache_enabled
|
||||
|
||||
def code_document(
|
||||
self,
|
||||
doc: Document,
|
||||
code_set: Optional[Set[Code]] = None,
|
||||
) -> Optional[CodedDocument]:
|
||||
# get hash of the document
|
||||
doc_hash = hash(doc)
|
||||
cache_file = os.path.join(self.cache_dir, f"{doc_hash}.json") if self.cache_enabled else None
|
||||
|
||||
if self.cache_enabled:
|
||||
if not os.path.exists(self.cache_dir):
|
||||
os.makedirs(self.cache_dir)
|
||||
if cache_file and os.path.exists(cache_file):
|
||||
with open(cache_file, "r") as f:
|
||||
cached_coded_doc_json = f.read()
|
||||
return CodedDocument.from_json(cached_coded_doc_json)
|
||||
|
||||
# sanitize the doc before passing it to openai
|
||||
doc.text = remove_control_characters(doc.text)
|
||||
|
||||
coded_document: Optional[CodedDocument] = None
|
||||
|
||||
if code_set is None:
|
||||
completion = self.client.beta.chat.completions.parse(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": """You are an expert qualitative researcher.
|
||||
|
||||
Given a list of dcocuments containing errors below, generate a list of (error) codes.
|
||||
Each code should contains:
|
||||
- at least 3 words, max 4 word, hyphenated.
|
||||
|
||||
For example, the name could be of the format "lack-of-word2",
|
||||
"failed-to-bar", "excessive-use-of-magenta". Name should adhere to
|
||||
Joseph M. Williams' writing principles of clarity, conciseness, and coherence.
|
||||
|
||||
Ensure each code name is lower-case, hyphenated, and directly reflects the
|
||||
concept it represents. Avoid ambiguous or overly complex terms, and prioritize
|
||||
simplicity, precision, and readability in the naming.
|
||||
|
||||
The code names should pass the 'clarity and grace' test by being easy to
|
||||
understand, descriptive, and reflective of the content they categorize.
|
||||
- suggest codes that are similar to good code names. avoid code names that are
|
||||
similar to bad code names.
|
||||
- The definition should be simple worded and practical. At least 2 sentences,
|
||||
max 3. It should be written in past tense.
|
||||
|
||||
It should convey how a labeller could apply this code to future logs, without
|
||||
mentioning the word "labeller". The definition should be specific enough to be
|
||||
useful in debugging. It should be very concrete. And should be well thought and
|
||||
make sense. Bull shitting will not earn you any points.
|
||||
|
||||
- The examples should be a list. Each example should be descriptive between
|
||||
2-3 sentences. Examples should be concrete, informative and not vague. Provide
|
||||
at max 20 salient examples. Examples should contain a lot of detail about what
|
||||
happened and should refer to incidents in the log.
|
||||
|
||||
- The list of codes must mutually exclusive.
|
||||
|
||||
# GOOD EXAMPLES OF FINAL CODE NAMES/CLUSTERS
|
||||
* looped-without-progress
|
||||
* repeated-unsuccessful-actions
|
||||
* repeated-syntax-errors
|
||||
* exceeded-context-window-limits
|
||||
* encountered-security-risks
|
||||
* failure-to-switch-strategy
|
||||
* exceeded-resource-limits
|
||||
* attempted-to-handle-excessive-data
|
||||
* no-errors-detected
|
||||
These names are high-level but also concrete. They exactly mention the type of
|
||||
error, issue, gap that has been identified.
|
||||
|
||||
## BAD EXAMPLES OF FINAL CODE NAMES/CLUSTERS
|
||||
* mismanaged-data-utilization -- too high level
|
||||
* incomplete-or-misguided-execution -- too high level
|
||||
* misaligned-agent-interactions -- too high level
|
||||
* mismanaged-task-strategies -- too high level
|
||||
* resource-inefficiencies -- vague
|
||||
* communication-issues -- vague
|
||||
* coordination-issues -- too high level and vague
|
||||
* operational-failures
|
||||
* execution-errors -- too high level
|
||||
* navigation-issues -- too concise
|
||||
* adaptive-failures -- too concise
|
||||
* successful-processes -- I dont like the word processes
|
||||
* system-constraints
|
||||
* configuration-issues
|
||||
* information-inaccuracies -- too high level
|
||||
* process-improvements -- vague, not an error
|
||||
* inadequate-error-response -- too high-level, unclear what kind of errors
|
||||
* specific-access-issues -- makes no sense
|
||||
* strategy-inefficiency -- strategy is too high level
|
||||
* error-management-gaps -- unclear what error management means
|
||||
* error-handling-deficiency -- unclear what kind of errors
|
||||
* coordination-breakdown -- unclear what coordination means
|
||||
* muddled-task-execution -- unclear what kind of tasks were muddled
|
||||
* task-completion-gaps -- too high level
|
||||
The above names are too high level and unclear. Please DO NOT use such names.
|
||||
""",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": doc.text,
|
||||
},
|
||||
],
|
||||
response_format=CodeList,
|
||||
)
|
||||
|
||||
message = completion.choices[0].message
|
||||
if message.parsed and len(message.parsed.code_list) > 0:
|
||||
coded_document = CodedDocument(
|
||||
doc=doc, codes=set(message.parsed.code_list)
|
||||
)
|
||||
else:
|
||||
print(message.refusal)
|
||||
raise ValueError("Error in coding document with OpenAI")
|
||||
else:
|
||||
code_to_str = "\n".join(
|
||||
[
|
||||
(
|
||||
f"\n---\nCode Name: {code.name}\n"
|
||||
f"Definition: {code.definition}\n"
|
||||
f"Examples: {code.examples}\n---\n"
|
||||
)
|
||||
for code in code_set
|
||||
]
|
||||
)
|
||||
|
||||
completion = self.client.beta.chat.completions.parse(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": """You are an expert qualitative researcher.
|
||||
You can answer any questions about coding logs.""",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"""
|
||||
## Context
|
||||
The text below shows a log containing errors. Your task is to code the log with
|
||||
the following codes. Generate a list of codes for the log below.
|
||||
|
||||
Only use the codes from the list below. Do not create new codes.
|
||||
Modify the examples of the codes to fit the context of the log.
|
||||
|
||||
Your example should be informative to narrow down the details of the error in
|
||||
the context of the example.
|
||||
|
||||
## Codes
|
||||
|
||||
{code_to_str}
|
||||
|
||||
## Log
|
||||
|
||||
{doc.text}
|
||||
""",
|
||||
},
|
||||
],
|
||||
response_format=CodeList,
|
||||
)
|
||||
|
||||
message = completion.choices[0].message
|
||||
if message.parsed and len(message.parsed.code_list) > 0:
|
||||
code_list = message.parsed.code_list
|
||||
# filter out codes whose names are not in the code_set
|
||||
code_set_names = {code.name for code in code_set}
|
||||
code_list = [code for code in code_list if code.name in code_set_names]
|
||||
|
||||
coded_document = CodedDocument(doc=doc, codes=set(code_list))
|
||||
|
||||
if coded_document is None:
|
||||
raise ValueError("Error in coding document with OpenAI")
|
||||
|
||||
if self.cache_enabled and cache_file:
|
||||
with open(cache_file, "w") as f:
|
||||
f.write(coded_document.model_dump_json(indent=4))
|
||||
return coded_document
|
||||
Loading…
x
Reference in New Issue
Block a user