mirror of
https://github.com/unclecode/crawl4ai.git
synced 2026-01-08 00:18:11 +00:00
refactor(logger): Apply the Enumeration for color
This commit is contained in:
parent
b2f3cb0dfa
commit
cd2b490b40
@ -18,6 +18,24 @@ class LogLevel(Enum):
|
||||
def __str__(self):
|
||||
return self.name.lower()
|
||||
|
||||
class LogColor(str, Enum):
|
||||
"""Enum for log colors."""
|
||||
|
||||
DEBUG = "lightblack"
|
||||
INFO = "cyan"
|
||||
SUCCESS = "green"
|
||||
WARNING = "yellow"
|
||||
ERROR = "red"
|
||||
CYAN = "cyan"
|
||||
GREEN = "green"
|
||||
YELLOW = "yellow"
|
||||
MAGENTA = "magenta"
|
||||
DIM_MAGENTA = "dim magenta"
|
||||
|
||||
def __str__(self):
|
||||
"""Automatically convert rich color to string."""
|
||||
return self.value
|
||||
|
||||
|
||||
class AsyncLoggerBase(ABC):
|
||||
@abstractmethod
|
||||
@ -48,6 +66,7 @@ class AsyncLoggerBase(ABC):
|
||||
def error_status(self, url: str, error: str, tag: str = "ERROR", url_length: int = 50):
|
||||
pass
|
||||
|
||||
|
||||
class AsyncLogger(AsyncLoggerBase):
|
||||
"""
|
||||
Asynchronous logger with support for colored console output and file logging.
|
||||
@ -68,11 +87,11 @@ class AsyncLogger(AsyncLoggerBase):
|
||||
}
|
||||
|
||||
DEFAULT_COLORS = {
|
||||
LogLevel.DEBUG: "lightblack",
|
||||
LogLevel.INFO: "cyan",
|
||||
LogLevel.SUCCESS: "green",
|
||||
LogLevel.WARNING: "yellow",
|
||||
LogLevel.ERROR: "red",
|
||||
LogLevel.DEBUG: LogColor.DEBUG,
|
||||
LogLevel.INFO: LogColor.INFO,
|
||||
LogLevel.SUCCESS: LogColor.SUCCESS,
|
||||
LogLevel.WARNING: LogColor.WARNING,
|
||||
LogLevel.ERROR: LogColor.ERROR,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
@ -81,7 +100,7 @@ class AsyncLogger(AsyncLoggerBase):
|
||||
log_level: LogLevel = LogLevel.DEBUG,
|
||||
tag_width: int = 10,
|
||||
icons: Optional[Dict[str, str]] = None,
|
||||
colors: Optional[Dict[LogLevel, str]] = None,
|
||||
colors: Optional[Dict[LogLevel, LogColor]] = None,
|
||||
verbose: bool = True,
|
||||
):
|
||||
"""
|
||||
@ -130,9 +149,9 @@ class AsyncLogger(AsyncLoggerBase):
|
||||
message: str,
|
||||
tag: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
colors: Optional[Dict[str, str]] = None,
|
||||
colors: Optional[Dict[str, LogColor]] = None,
|
||||
boxes: Optional[List[str]] = None,
|
||||
base_color: Optional[str] = None,
|
||||
base_color: Optional[LogColor] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
@ -152,8 +171,11 @@ class AsyncLogger(AsyncLoggerBase):
|
||||
|
||||
# avoid conflict with rich formatting
|
||||
parsed_message = message.replace("[", "[[").replace("]", "]]")
|
||||
raw_message = message.format(**params) if params else message
|
||||
if params:
|
||||
# FIXME: If there are formatting strings in floating point format,
|
||||
# this may result in colors and boxes not being applied properly.
|
||||
# such as {value:.2f}, the value is 0.23333 format it to 0.23,
|
||||
# but we replace("0.23333", "[color]0.23333[/color]")
|
||||
formatted_message = parsed_message.format(**params)
|
||||
for key, value in params.items():
|
||||
# value_str may discard `[` and `]`, so we need to replace it.
|
||||
@ -163,17 +185,17 @@ class AsyncLogger(AsyncLoggerBase):
|
||||
color_str = f"[{colors[key]}]{value_str}[/{colors[key]}]"
|
||||
formatted_message = formatted_message.replace(value_str, color_str)
|
||||
value_str = color_str
|
||||
|
||||
|
||||
# check is need apply box
|
||||
if boxes and key in boxes:
|
||||
formatted_message = formatted_message.replace(value_str,
|
||||
formatted_message = formatted_message.replace(value_str,
|
||||
create_box_message(value_str, type=str(level)))
|
||||
|
||||
|
||||
else:
|
||||
formatted_message = parsed_message
|
||||
|
||||
# Construct the full log line
|
||||
color = base_color or self.colors[level]
|
||||
color: LogColor = base_color or self.colors[level]
|
||||
log_line = f"[{color}]{self._format_tag(tag)} {self._get_icon(tag)} {formatted_message} [/{color}]"
|
||||
|
||||
# Output to console if verbose
|
||||
@ -223,17 +245,17 @@ class AsyncLogger(AsyncLoggerBase):
|
||||
"""
|
||||
self._log(
|
||||
level=LogLevel.SUCCESS if success else LogLevel.ERROR,
|
||||
message="{url:.{url_length}}... | Status: {status} | Time: {timing:.2f}s",
|
||||
message="{url:.{url_length}}... | Status: {status} | Time: {timing}s",
|
||||
tag=tag,
|
||||
params={
|
||||
"url": url,
|
||||
"url_length": url_length,
|
||||
"status": success,
|
||||
"timing": timing,
|
||||
"timing": f"{timing:.2f}", # aviod a format string
|
||||
},
|
||||
colors={
|
||||
"status": "green" if success else "red",
|
||||
"timing": "yellow",
|
||||
"status": LogColor.SUCCESS if success else LogColor.ERROR,
|
||||
"timing": LogColor.WARNING,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@ from .markdown_generation_strategy import (
|
||||
MarkdownGenerationStrategy,
|
||||
)
|
||||
from .deep_crawling import DeepCrawlDecorator
|
||||
from .async_logger import AsyncLogger, AsyncLoggerBase
|
||||
from .async_logger import AsyncLogger, AsyncLoggerBase, LogColor
|
||||
from .async_configs import BrowserConfig, CrawlerRunConfig
|
||||
from .async_dispatcher import * # noqa: F403
|
||||
from .async_dispatcher import BaseDispatcher, MemoryAdaptiveDispatcher, RateLimiter
|
||||
@ -43,7 +43,6 @@ from .utils import (
|
||||
sanitize_input_encode,
|
||||
InvalidCSSSelectorError,
|
||||
fast_format_html,
|
||||
create_box_message,
|
||||
get_error_context,
|
||||
RobotsParser,
|
||||
)
|
||||
@ -381,8 +380,8 @@ class AsyncWebCrawler:
|
||||
"timing": f"{time.perf_counter() - start_time:.2f}s",
|
||||
},
|
||||
colors={
|
||||
"status": "green" if crawl_result.success else "red",
|
||||
"timing": "yellow",
|
||||
"status": LogColor.SUCCESS if crawl_result.success else LogColor.ERROR,
|
||||
"timing": LogColor.WARNING,
|
||||
},
|
||||
)
|
||||
|
||||
@ -401,7 +400,10 @@ class AsyncWebCrawler:
|
||||
"status": True,
|
||||
"timing": f"{time.perf_counter() - start_time:.2f}s",
|
||||
},
|
||||
colors={"status": "green", "timing": "yellow"},
|
||||
colors={
|
||||
"status": LogColor.SUCCESS if crawl_result.success else LogColor.ERROR,
|
||||
"timing": LogColor.WARNING,
|
||||
},
|
||||
)
|
||||
|
||||
cached_result.success = bool(html)
|
||||
|
||||
@ -20,7 +20,7 @@ from rich.console import Console
|
||||
|
||||
from .async_configs import BrowserConfig
|
||||
from .browser_manager import ManagedBrowser
|
||||
from .async_logger import AsyncLogger, AsyncLoggerBase
|
||||
from .async_logger import AsyncLogger, AsyncLoggerBase, LogColor
|
||||
from .utils import get_home_folder
|
||||
|
||||
|
||||
@ -129,16 +129,16 @@ class BrowserProfiler:
|
||||
|
||||
# Print instructions for the user with rich formatting
|
||||
border = "{'='*80}"
|
||||
self.logger.info("{border}", tag="PROFILE", params={"border": f"\n{border}"}, colors={"border": "cyan"})
|
||||
self.logger.info("Creating browser profile: {profile_name}", tag="PROFILE", params={"profile_name": profile_name}, colors={"profile_name": "green"})
|
||||
self.logger.info("Profile directory: {profile_path}", tag="PROFILE", params={"profile_path": profile_path}, colors={"profile_path": "yellow"})
|
||||
self.logger.info("{border}", tag="PROFILE", params={"border": f"\n{border}"}, colors={"border": LogColor.CYAN})
|
||||
self.logger.info("Creating browser profile: {profile_name}", tag="PROFILE", params={"profile_name": profile_name}, colors={"profile_name": LogColor.GREEN})
|
||||
self.logger.info("Profile directory: {profile_path}", tag="PROFILE", params={"profile_path": profile_path}, colors={"profile_path": LogColor.YELLOW})
|
||||
|
||||
self.logger.info("\nInstructions:", tag="PROFILE")
|
||||
self.logger.info("1. A browser window will open for you to set up your profile.", tag="PROFILE")
|
||||
self.logger.info("{segment}, configure settings, etc. as needed.", tag="PROFILE", params={"segment": "2. Log in to websites"}, colors={"segment": "cyan"})
|
||||
self.logger.info("3. When you're done, {segment} to close the browser.", tag="PROFILE", params={"segment": "press 'q' in this terminal"}, colors={"segment": "yellow"})
|
||||
self.logger.info("{segment}, configure settings, etc. as needed.", tag="PROFILE", params={"segment": "2. Log in to websites"}, colors={"segment": LogColor.CYAN})
|
||||
self.logger.info("3. When you're done, {segment} to close the browser.", tag="PROFILE", params={"segment": "press 'q' in this terminal"}, colors={"segment": LogColor.YELLOW})
|
||||
self.logger.info("4. The profile will be saved and ready to use with Crawl4AI.", tag="PROFILE")
|
||||
self.logger.info("{border}", tag="PROFILE", params={"border": f"{border}\n"}, colors={"border": "cyan"})
|
||||
self.logger.info("{border}", tag="PROFILE", params={"border": f"{border}\n"}, colors={"border": LogColor.CYAN})
|
||||
|
||||
# Create managed browser instance
|
||||
managed_browser = ManagedBrowser(
|
||||
@ -197,7 +197,7 @@ class BrowserProfiler:
|
||||
if readable:
|
||||
key = sys.stdin.read(1)
|
||||
if key.lower() == 'q':
|
||||
self.logger.info("Closing browser and saving profile...", tag="PROFILE", base_color="green")
|
||||
self.logger.info("Closing browser and saving profile...", tag="PROFILE", base_color=LogColor.GREEN)
|
||||
user_done_event.set()
|
||||
return
|
||||
|
||||
@ -223,7 +223,7 @@ class BrowserProfiler:
|
||||
self.logger.error("Failed to start browser process.", tag="PROFILE")
|
||||
return None
|
||||
|
||||
self.logger.info(f"Browser launched. Waiting for you to finish...", tag="PROFILE")
|
||||
self.logger.info("Browser launched. Waiting for you to finish...", tag="PROFILE")
|
||||
|
||||
# Start listening for keyboard input
|
||||
listener_task = asyncio.create_task(listen_for_quit_command())
|
||||
@ -440,18 +440,18 @@ class BrowserProfiler:
|
||||
```
|
||||
"""
|
||||
while True:
|
||||
self.logger.info(f"\nProfile Management Options:", tag="MENU")
|
||||
self.logger.info(f"1. Create a new profile", tag="MENU", base_color="green")
|
||||
self.logger.info(f"2. List available profiles", tag="MENU", base_color="yellow")
|
||||
self.logger.info(f"3. Delete a profile", tag="MENU", base_color="red")
|
||||
self.logger.info("\nProfile Management Options:", tag="MENU")
|
||||
self.logger.info("1. Create a new profile", tag="MENU", base_color=LogColor.GREEN)
|
||||
self.logger.info("2. List available profiles", tag="MENU", base_color=LogColor.YELLOW)
|
||||
self.logger.info("3. Delete a profile", tag="MENU", base_color=LogColor.RED)
|
||||
|
||||
# Only show crawl option if callback provided
|
||||
if crawl_callback:
|
||||
self.logger.info(f"4. Use a profile to crawl a website", tag="MENU", base_color="cyan")
|
||||
self.logger.info(f"5. Exit", tag="MENU", base_color="magenta")
|
||||
self.logger.info("4. Use a profile to crawl a website", tag="MENU", base_color=LogColor.CYAN)
|
||||
self.logger.info("5. Exit", tag="MENU", base_color=LogColor.MAGENTA)
|
||||
exit_option = "5"
|
||||
else:
|
||||
self.logger.info(f"4. Exit", tag="MENU", base_color="magenta")
|
||||
self.logger.info("4. Exit", tag="MENU", base_color=LogColor.MAGENTA)
|
||||
exit_option = "4"
|
||||
|
||||
self.logger.print(f"\n[cyan]Enter your choice (1-{exit_option}): [/cyan]", end="")
|
||||
@ -475,7 +475,7 @@ class BrowserProfiler:
|
||||
self.logger.info("\nAvailable profiles:", tag="PROFILES")
|
||||
for i, profile in enumerate(profiles):
|
||||
self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
|
||||
self.logger.info(f" Path: {profile['path']}", tag="PROFILES", base_color="yellow")
|
||||
self.logger.info(f" Path: {profile['path']}", tag="PROFILES", base_color=LogColor.YELLOW)
|
||||
self.logger.info(f" Created: {profile['created'].strftime('%Y-%m-%d %H:%M:%S')}", tag="PROFILES")
|
||||
self.logger.info(f" Browser type: {profile['type']}", tag="PROFILES")
|
||||
self.logger.info("", tag="PROFILES") # Empty line for spacing
|
||||
@ -488,7 +488,7 @@ class BrowserProfiler:
|
||||
continue
|
||||
|
||||
# Display numbered list
|
||||
self.logger.info(f"\nAvailable profiles:", tag="PROFILES", base_color="yellow")
|
||||
self.logger.info("\nAvailable profiles:", tag="PROFILES", base_color=LogColor.YELLOW)
|
||||
for i, profile in enumerate(profiles):
|
||||
self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
|
||||
|
||||
@ -527,7 +527,7 @@ class BrowserProfiler:
|
||||
continue
|
||||
|
||||
# Display numbered list
|
||||
self.logger.info(f"\nAvailable profiles:", tag="PROFILES", base_color="yellow")
|
||||
self.logger.info("\nAvailable profiles:", tag="PROFILES", base_color=LogColor.YELLOW)
|
||||
for i, profile in enumerate(profiles):
|
||||
self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
|
||||
|
||||
@ -605,9 +605,9 @@ class BrowserProfiler:
|
||||
# Print initial information
|
||||
border = f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}"
|
||||
self.logger.info(f"\n{border}", tag="CDP")
|
||||
self.logger.info(f"Launching standalone browser with CDP debugging", tag="CDP")
|
||||
self.logger.info("Browser type: {browser_type}", tag="CDP", params={"browser_type": browser_type}, colors={"browser_type": "cyan"})
|
||||
self.logger.info("Profile path: {profile_path}", tag="CDP", params={"profile_path": profile_path}, colors={"profile_path": "yellow"})
|
||||
self.logger.info("Launching standalone browser with CDP debugging", tag="CDP")
|
||||
self.logger.info("Browser type: {browser_type}", tag="CDP", params={"browser_type": browser_type}, colors={"browser_type": LogColor.CYAN})
|
||||
self.logger.info("Profile path: {profile_path}", tag="CDP", params={"profile_path": profile_path}, colors={"profile_path": LogColor.YELLOW})
|
||||
self.logger.info(f"Debugging port: {debugging_port}", tag="CDP")
|
||||
self.logger.info(f"Headless mode: {headless}", tag="CDP")
|
||||
|
||||
@ -722,7 +722,7 @@ class BrowserProfiler:
|
||||
self.logger.error("Failed to start browser process.", tag="CDP")
|
||||
return None
|
||||
|
||||
self.logger.info(f"Browser launched successfully. Retrieving CDP information...", tag="CDP")
|
||||
self.logger.info("Browser launched successfully. Retrieving CDP information...", tag="CDP")
|
||||
|
||||
# Get CDP URL and JSON config
|
||||
cdp_url, config_json = await get_cdp_json(debugging_port)
|
||||
@ -732,10 +732,10 @@ class BrowserProfiler:
|
||||
|
||||
if config_json:
|
||||
# Display relevant CDP information
|
||||
self.logger.info(f"Browser: {config_json.get('Browser', 'Unknown')}", tag="CDP", colors={"Browser": "cyan"})
|
||||
self.logger.info(f"Protocol Version: {config_json.get('Protocol-Version', 'Unknown')}", tag="CDP", colors={"Protocol-Version": "cyan"})
|
||||
self.logger.info(f"Browser: {config_json.get('Browser', 'Unknown')}", tag="CDP", colors={"Browser": LogColor.CYAN})
|
||||
self.logger.info(f"Protocol Version: {config_json.get('Protocol-Version', 'Unknown')}", tag="CDP", colors={"Protocol-Version": LogColor.CYAN})
|
||||
if 'webSocketDebuggerUrl' in config_json:
|
||||
self.logger.info("WebSocket URL: {webSocketDebuggerUrl}", tag="CDP", params={"webSocketDebuggerUrl": config_json['webSocketDebuggerUrl']}, colors={"webSocketDebuggerUrl": "green"})
|
||||
self.logger.info("WebSocket URL: {webSocketDebuggerUrl}", tag="CDP", params={"webSocketDebuggerUrl": config_json['webSocketDebuggerUrl']}, colors={"webSocketDebuggerUrl": LogColor.GREEN})
|
||||
else:
|
||||
self.logger.warning("Could not retrieve CDP configuration JSON", tag="CDP")
|
||||
else:
|
||||
|
||||
@ -27,9 +27,7 @@ import json
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from .async_logger import AsyncLogger, LogLevel
|
||||
from rich.console import Console
|
||||
from rich.text import Text
|
||||
from .async_logger import AsyncLogger, LogLevel, LogColor
|
||||
|
||||
|
||||
class RelevantContentFilter(ABC):
|
||||
@ -847,7 +845,7 @@ class LLMContentFilter(RelevantContentFilter):
|
||||
},
|
||||
colors={
|
||||
**AsyncLogger.DEFAULT_COLORS,
|
||||
LogLevel.INFO: "dim magenta" # Dimmed purple for LLM ops
|
||||
LogLevel.INFO: LogColor.DIM_MAGENTA # Dimmed purple for LLM ops
|
||||
},
|
||||
)
|
||||
else:
|
||||
@ -892,7 +890,7 @@ class LLMContentFilter(RelevantContentFilter):
|
||||
"Starting LLM markdown content filtering process",
|
||||
tag="LLM",
|
||||
params={"provider": self.llm_config.provider},
|
||||
colors={"provider": "cyan"},
|
||||
colors={"provider": LogColor.CYAN},
|
||||
)
|
||||
|
||||
# Cache handling
|
||||
@ -929,7 +927,7 @@ class LLMContentFilter(RelevantContentFilter):
|
||||
"LLM markdown: Split content into {chunk_count} chunks",
|
||||
tag="CHUNK",
|
||||
params={"chunk_count": len(html_chunks)},
|
||||
colors={"chunk_count": "yellow"},
|
||||
colors={"chunk_count": LogColor.YELLOW},
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
@ -1038,7 +1036,7 @@ class LLMContentFilter(RelevantContentFilter):
|
||||
"LLM markdown: Completed processing in {time:.2f}s",
|
||||
tag="LLM",
|
||||
params={"time": end_time - start_time},
|
||||
colors={"time": "yellow"},
|
||||
colors={"time": LogColor.YELLOW},
|
||||
)
|
||||
|
||||
result = ordered_results if ordered_results else []
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user