crawl4ai/build_hooks.py

48 lines
1.7 KiB
Python

import os
import shutil
from pathlib import Path
import subprocess
import sys
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
PLUGIN = "CustomBuildHook"
class CustomBuildHook(BuildHookInterface):
def initialize(self, version, build_data):
# Create the .crawl4ai folder structure
base_dir = os.getenv("CRAWL4_AI_BASE_DIRECTORY")
crawl4ai_folder = Path(base_dir) if base_dir else Path.home()
crawl4ai_folder = crawl4ai_folder / ".crawl4ai"
cache_folder = crawl4ai_folder / "cache"
content_folders = [
"html_content",
"cleaned_html",
"markdown_content",
"extracted_content",
"screenshots",
]
# Clean up old cache if exists
if cache_folder.exists():
shutil.rmtree(cache_folder)
# Create new folder structure
crawl4ai_folder.mkdir(exist_ok=True)
cache_folder.mkdir(exist_ok=True)
for folder in content_folders:
(crawl4ai_folder / folder).mkdir(exist_ok=True)
# Install Playwright browsers
try:
subprocess.check_call([sys.executable, "-m", "playwright", "install"])
except Exception as e:
print(f"Warning: Playwright installation failed: {e}")
print("Please run 'python -m playwright install' manually after installation")
# Initialize database
try:
from crawl4ai.async_database import async_db_manager
import asyncio
asyncio.run(async_db_manager.initialize())
except Exception as e:
print(f"Warning: Database initialization failed: {e}")
print("Database will be initialized on first use")