crawl4ai/tests/loggers/test_logger.py

81 lines
3.0 KiB
Python
Raw Normal View History

import asyncio
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, AsyncLoggerBase
import os
from datetime import datetime
class AsyncFileLogger(AsyncLoggerBase):
"""
File-only asynchronous logger that writes logs to a specified file.
"""
def __init__(self, log_file: str):
"""
Initialize the file logger.
Args:
log_file: File path for logging
"""
self.log_file = log_file
os.makedirs(os.path.dirname(os.path.abspath(log_file)), exist_ok=True)
def _write_to_file(self, level: str, message: str, tag: str):
"""Write a message to the log file."""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
with open(self.log_file, "a", encoding="utf-8") as f:
f.write(f"[{timestamp}] [{level}] [{tag}] {message}\n")
def debug(self, message: str, tag: str = "DEBUG", **kwargs):
"""Log a debug message to file."""
self._write_to_file("DEBUG", message, tag)
def info(self, message: str, tag: str = "INFO", **kwargs):
"""Log an info message to file."""
self._write_to_file("INFO", message, tag)
def success(self, message: str, tag: str = "SUCCESS", **kwargs):
"""Log a success message to file."""
self._write_to_file("SUCCESS", message, tag)
def warning(self, message: str, tag: str = "WARNING", **kwargs):
"""Log a warning message to file."""
self._write_to_file("WARNING", message, tag)
def error(self, message: str, tag: str = "ERROR", **kwargs):
"""Log an error message to file."""
self._write_to_file("ERROR", message, tag)
def url_status(self, url: str, success: bool, timing: float, tag: str = "FETCH", url_length: int = 50):
"""Log URL fetch status to file."""
status = "SUCCESS" if success else "FAILED"
message = f"{url[:url_length]}... | Status: {status} | Time: {timing:.2f}s"
self._write_to_file("URL_STATUS", message, tag)
def error_status(self, url: str, error: str, tag: str = "ERROR", url_length: int = 50):
"""Log error status to file."""
message = f"{url[:url_length]}... | Error: {error}"
self._write_to_file("ERROR", message, tag)
async def main():
browser_config = BrowserConfig(headless=True, verbose=True)
crawler = AsyncWebCrawler(config=browser_config, logger=AsyncFileLogger("/Users/unclecode/devs/crawl4ai/.private/tmp/crawl.log"))
await crawler.start()
try:
crawl_config = CrawlerRunConfig(
cache_mode=CacheMode.BYPASS,
)
# Use the crawler multiple times
result = await crawler.arun(
url='https://kidocode.com/',
config=crawl_config
)
if result.success:
print("First crawl - Raw Markdown Length:", len(result.markdown_v2.raw_markdown))
finally:
# Always ensure we close the crawler
await crawler.close()
if __name__ == "__main__":
asyncio.run(main())