mirror of
https://github.com/unclecode/crawl4ai.git
synced 2025-11-02 21:12:54 +00:00
48 lines
1.7 KiB
Python
48 lines
1.7 KiB
Python
import os
|
|
import shutil
|
|
from pathlib import Path
|
|
import subprocess
|
|
import sys
|
|
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
|
|
PLUGIN = "CustomBuildHook"
|
|
|
|
class CustomBuildHook(BuildHookInterface):
|
|
def initialize(self, version, build_data):
|
|
# Create the .crawl4ai folder structure
|
|
base_dir = os.getenv("CRAWL4_AI_BASE_DIRECTORY")
|
|
crawl4ai_folder = Path(base_dir) if base_dir else Path.home()
|
|
crawl4ai_folder = crawl4ai_folder / ".crawl4ai"
|
|
cache_folder = crawl4ai_folder / "cache"
|
|
content_folders = [
|
|
"html_content",
|
|
"cleaned_html",
|
|
"markdown_content",
|
|
"extracted_content",
|
|
"screenshots",
|
|
]
|
|
|
|
# Clean up old cache if exists
|
|
if cache_folder.exists():
|
|
shutil.rmtree(cache_folder)
|
|
|
|
# Create new folder structure
|
|
crawl4ai_folder.mkdir(exist_ok=True)
|
|
cache_folder.mkdir(exist_ok=True)
|
|
for folder in content_folders:
|
|
(crawl4ai_folder / folder).mkdir(exist_ok=True)
|
|
|
|
# Install Playwright browsers
|
|
try:
|
|
subprocess.check_call([sys.executable, "-m", "playwright", "install"])
|
|
except Exception as e:
|
|
print(f"Warning: Playwright installation failed: {e}")
|
|
print("Please run 'python -m playwright install' manually after installation")
|
|
|
|
# Initialize database
|
|
try:
|
|
from crawl4ai.async_database import async_db_manager
|
|
import asyncio
|
|
asyncio.run(async_db_manager.initialize())
|
|
except Exception as e:
|
|
print(f"Warning: Database initialization failed: {e}")
|
|
print("Database will be initialized on first use") |