import fs from "fs"; import puppeteer from "puppeteer-core"; import path from "path"; import os from "os"; import { execSync } from "child_process"; import * as ChromeLauncher from "chrome-launcher"; // ===== Configuration Types and Defaults ===== /** * Configuration interface for the Puppeteer service */ export interface PuppeteerServiceConfig { // Browser preferences preferredBrowsers?: string[]; // Order of browser preference ("chrome", "edge", "brave", "firefox") customBrowserPaths?: { [key: string]: string }; // Custom browser executable paths // Connection settings debugPorts?: number[]; // Ports to try when connecting to existing browsers connectionTimeout?: number; // Timeout for connection attempts in ms maxRetries?: number; // Maximum number of retries for connections // Browser cleanup settings browserCleanupTimeout?: number; // Timeout before closing inactive browsers (ms) // Performance settings blockResourceTypes?: string[]; // Resource types to block for performance } // Default configuration values const DEFAULT_CONFIG: PuppeteerServiceConfig = { preferredBrowsers: ["chrome", "edge", "brave", "firefox"], debugPorts: [9222, 9223, 9224, 9225], connectionTimeout: 10000, maxRetries: 3, browserCleanupTimeout: 60000, blockResourceTypes: ["image", "font", "media"], }; // Browser support notes: // - Chrome/Chromium: Fully supported (primary target) // - Edge: Fully supported (Chromium-based) // - Brave: Fully supported (Chromium-based) // - Firefox: Partially supported (some features may not work) // - Safari: Not supported by Puppeteer // ===== Global State ===== // Current active configuration let currentConfig: PuppeteerServiceConfig = { ...DEFAULT_CONFIG }; // Browser instance management let headlessBrowserInstance: puppeteer.Browser | null = null; let launchedBrowserWSEndpoint: string | null = null; // Cleanup management let browserCleanupTimeout: NodeJS.Timeout | null = null; let BROWSER_CLEANUP_TIMEOUT = 60000; // 60 seconds default // Cache for browser executable paths let detectedBrowserPath: string | null = null; // ===== Configuration Functions ===== /** * Configure the Puppeteer service with custom settings * @param config Partial configuration to override defaults */ export function configurePuppeteerService( config: Partial ): void { currentConfig = { ...DEFAULT_CONFIG, ...config }; // Update the timeout if it was changed if ( config.browserCleanupTimeout && config.browserCleanupTimeout !== BROWSER_CLEANUP_TIMEOUT ) { BROWSER_CLEANUP_TIMEOUT = config.browserCleanupTimeout; } console.log("Puppeteer service configured:", currentConfig); } // ===== Browser Management ===== /** * Get or create a headless browser instance * @returns Promise resolving to a browser instance */ async function getHeadlessBrowserInstance(): Promise { console.log("Browser instance request started"); // Cancel any scheduled cleanup cancelScheduledCleanup(); // Try to reuse existing browser if (headlessBrowserInstance) { try { const pages = await headlessBrowserInstance.pages(); console.log( `Reusing existing headless browser with ${pages.length} pages` ); return headlessBrowserInstance; } catch (error) { console.log( "Existing browser instance is no longer valid, creating a new one" ); headlessBrowserInstance = null; launchedBrowserWSEndpoint = null; } } // Create a new browser instance return launchNewBrowser(); } /** * Launches a new browser instance * @returns Promise resolving to a browser instance */ async function launchNewBrowser(): Promise { console.log("Creating new headless browser instance"); // Setup temporary user data directory const userDataDir = createTempUserDataDir(); let browser: puppeteer.Browser | null = null; try { // Configure launch options const launchOptions = configureLaunchOptions(userDataDir); // Set custom browser executable await setCustomBrowserExecutable(launchOptions); // Launch the browser console.log( "Launching browser with options:", JSON.stringify({ headless: launchOptions.headless, executablePath: launchOptions.executablePath, }) ); browser = await puppeteer.launch(launchOptions); // Store references to the browser instance launchedBrowserWSEndpoint = browser.wsEndpoint(); headlessBrowserInstance = browser; // Setup cleanup handlers setupBrowserCleanupHandlers(browser, userDataDir); console.log("Browser ready"); return browser; } catch (error) { console.error("Failed to launch browser:", error); // Clean up resources if (browser) { try { await browser.close(); } catch (closeError) { console.error("Error closing browser:", closeError); } headlessBrowserInstance = null; launchedBrowserWSEndpoint = null; } // Clean up the temporary directory try { fs.rmSync(userDataDir, { recursive: true, force: true }); } catch (fsError) { console.error("Error removing temporary directory:", fsError); } throw error; } } /** * Creates a temporary user data directory for the browser * @returns Path to the created directory */ function createTempUserDataDir(): string { const tempDir = os.tmpdir(); const uniqueId = `${Date.now().toString()}-${Math.random() .toString(36) .substring(2)}`; const userDataDir = path.join(tempDir, `browser-debug-profile-${uniqueId}`); fs.mkdirSync(userDataDir, { recursive: true }); console.log(`Using temporary user data directory: ${userDataDir}`); return userDataDir; } /** * Configures browser launch options * @param userDataDir Path to the user data directory * @returns Launch options object */ function configureLaunchOptions(userDataDir: string): any { const launchOptions: any = { args: [ "--remote-debugging-port=0", // Use dynamic port `--user-data-dir=${userDataDir}`, "--no-first-run", "--no-default-browser-check", "--disable-dev-shm-usage", "--disable-extensions", "--disable-component-extensions-with-background-pages", "--disable-background-networking", "--disable-backgrounding-occluded-windows", "--disable-default-apps", "--disable-sync", "--disable-translate", "--metrics-recording-only", "--no-pings", "--safebrowsing-disable-auto-update", ], }; // Add headless mode (using any to bypass type checking issues) launchOptions.headless = "new"; return launchOptions; } /** * Sets a custom browser executable path if configured * @param launchOptions Launch options object to modify */ async function setCustomBrowserExecutable(launchOptions: any): Promise { // First, try to use a custom browser path from configuration if ( currentConfig.customBrowserPaths && Object.keys(currentConfig.customBrowserPaths).length > 0 ) { const preferredBrowsers = currentConfig.preferredBrowsers || [ "chrome", "edge", "brave", "firefox", ]; for (const browser of preferredBrowsers) { if ( currentConfig.customBrowserPaths[browser] && fs.existsSync(currentConfig.customBrowserPaths[browser]) ) { launchOptions.executablePath = currentConfig.customBrowserPaths[browser]; // Set product to firefox if using Firefox browser if (browser === "firefox") { launchOptions.product = "firefox"; } console.log( `Using custom ${browser} path: ${launchOptions.executablePath}` ); return; } } } // If no custom path is found, use cached path or detect a new one try { if (detectedBrowserPath && fs.existsSync(detectedBrowserPath)) { console.log(`Using cached browser path: ${detectedBrowserPath}`); launchOptions.executablePath = detectedBrowserPath; // Check if the detected browser is Firefox if (detectedBrowserPath.includes("firefox")) { launchOptions.product = "firefox"; console.log("Setting product to firefox for Firefox browser"); } } else { detectedBrowserPath = await findBrowserExecutablePath(); launchOptions.executablePath = detectedBrowserPath; // Check if the detected browser is Firefox if (detectedBrowserPath.includes("firefox")) { launchOptions.product = "firefox"; console.log("Setting product to firefox for Firefox browser"); } console.log( `Using detected browser path: ${launchOptions.executablePath}` ); } } catch (error) { console.error("Failed to detect browser executable path:", error); throw new Error( "No browser executable path found. Please specify a custom browser path in the configuration." ); } } /** * Find a browser executable path on the current system * @returns Path to a browser executable */ async function findBrowserExecutablePath(): Promise { // Try to use chrome-launcher (most reliable method) try { console.log("Attempting to find Chrome using chrome-launcher..."); // Launch Chrome using chrome-launcher const chrome = await ChromeLauncher.launch({ chromeFlags: ["--headless"], handleSIGINT: false, }); // chrome-launcher stores the Chrome executable path differently than Puppeteer // Let's try different approaches to get it // First check if we can access it directly let chromePath = ""; // Chrome version data often contains the path if (chrome.process && chrome.process.spawnfile) { chromePath = chrome.process.spawnfile; console.log("Found Chrome path from process.spawnfile"); } else { // Try to get the Chrome path from chrome-launcher // In newer versions, it's directly accessible console.log("Trying to determine Chrome path using other methods"); // This will actually return the real Chrome path for us // chrome-launcher has this inside but doesn't expose it directly const possiblePaths = [ process.env.CHROME_PATH, // Common paths by OS ...(process.platform === "darwin" ? ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"] : process.platform === "win32" ? [ `${process.env.PROGRAMFILES}\\Google\\Chrome\\Application\\chrome.exe`, `${process.env["PROGRAMFILES(X86)"]}\\Google\\Chrome\\Application\\chrome.exe`, ] : ["/usr/bin/google-chrome"]), ].filter(Boolean); // Use the first valid path for (const p of possiblePaths) { if (p && fs.existsSync(p)) { chromePath = p; console.log("Found Chrome path from common locations"); break; } } } // Always kill the Chrome instance we just launched await chrome.kill(); if (chromePath) { console.log(`Chrome found via chrome-launcher: ${chromePath}`); return chromePath; } else { console.log("Chrome launched but couldn't determine executable path"); } } catch (error) { // Check if it's a ChromeNotInstalledError const errorMessage = error instanceof Error ? error.message : String(error); if ( errorMessage.includes("No Chrome installations found") || (error as any)?.code === "ERR_LAUNCHER_NOT_INSTALLED" ) { console.log("Chrome not installed. Falling back to manual detection"); } else { console.error("Failed to find Chrome using chrome-launcher:", error); console.log("Falling back to manual detection"); } } // If chrome-launcher failed, use manual detection const platform = process.platform; const preferredBrowsers = currentConfig.preferredBrowsers || [ "chrome", "edge", "brave", "firefox", ]; console.log(`Attempting to detect browser executable path on ${platform}...`); // Platform-specific detection strategies if (platform === "win32") { // Windows - try registry detection for Chrome let registryPath = null; try { console.log("Checking Windows registry for Chrome..."); // Try HKLM first const regOutput = execSync( 'reg query "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\chrome.exe" /ve', { encoding: "utf8" } ); // Extract path from registry output const match = regOutput.match(/REG_(?:SZ|EXPAND_SZ)\s+([^\s]+)/i); if (match && match[1]) { registryPath = match[1].replace(/\\"/g, ""); // Verify the path exists if (fs.existsSync(registryPath)) { console.log(`Found Chrome via HKLM registry: ${registryPath}`); return registryPath; } } } catch (e) { // Try HKCU if HKLM fails try { console.log("Checking user registry for Chrome..."); const regOutput = execSync( 'reg query "HKEY_CURRENT_USER\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\chrome.exe" /ve', { encoding: "utf8" } ); // Extract path from registry output const match = regOutput.match(/REG_(?:SZ|EXPAND_SZ)\s+([^\s]+)/i); if (match && match[1]) { registryPath = match[1].replace(/\\"/g, ""); // Verify the path exists if (fs.existsSync(registryPath)) { console.log(`Found Chrome via HKCU registry: ${registryPath}`); return registryPath; } } } catch (innerError) { console.log( "Failed to find Chrome via registry, continuing with path checks" ); } } // Try to find Chrome through BLBeacon registry key (version info) try { console.log("Checking Chrome BLBeacon registry..."); const regOutput = execSync( 'reg query "HKEY_CURRENT_USER\\Software\\Google\\Chrome\\BLBeacon" /v version', { encoding: "utf8" } ); if (regOutput) { // If BLBeacon exists, Chrome is likely installed in the default location const programFiles = process.env.PROGRAMFILES || "C:\\Program Files"; const programFilesX86 = process.env["PROGRAMFILES(X86)"] || "C:\\Program Files (x86)"; const defaultChromePaths = [ path.join(programFiles, "Google\\Chrome\\Application\\chrome.exe"), path.join(programFilesX86, "Google\\Chrome\\Application\\chrome.exe"), ]; for (const chromePath of defaultChromePaths) { if (fs.existsSync(chromePath)) { console.log( `Found Chrome via BLBeacon registry hint: ${chromePath}` ); return chromePath; } } } } catch (e) { console.log("Failed to find Chrome via BLBeacon registry"); } // Continue with regular path checks const programFiles = process.env.PROGRAMFILES || "C:\\Program Files"; const programFilesX86 = process.env["PROGRAMFILES(X86)"] || "C:\\Program Files (x86)"; // Common Windows browser paths const winBrowserPaths = { chrome: [ path.join(programFiles, "Google\\Chrome\\Application\\chrome.exe"), path.join(programFilesX86, "Google\\Chrome\\Application\\chrome.exe"), ], edge: [ path.join(programFiles, "Microsoft\\Edge\\Application\\msedge.exe"), path.join(programFilesX86, "Microsoft\\Edge\\Application\\msedge.exe"), ], brave: [ path.join( programFiles, "BraveSoftware\\Brave-Browser\\Application\\brave.exe" ), path.join( programFilesX86, "BraveSoftware\\Brave-Browser\\Application\\brave.exe" ), ], firefox: [ path.join(programFiles, "Mozilla Firefox\\firefox.exe"), path.join(programFilesX86, "Mozilla Firefox\\firefox.exe"), ], }; // Check each browser in preferred order for (const browser of preferredBrowsers) { const paths = winBrowserPaths[browser as keyof typeof winBrowserPaths] || []; for (const browserPath of paths) { if (fs.existsSync(browserPath)) { console.log(`Found ${browser} at ${browserPath}`); return browserPath; } } } } else if (platform === "darwin") { // macOS browser paths const macBrowserPaths = { chrome: ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"], edge: ["/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"], brave: ["/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"], firefox: ["/Applications/Firefox.app/Contents/MacOS/firefox"], safari: ["/Applications/Safari.app/Contents/MacOS/Safari"], }; // Check each browser in preferred order for (const browser of preferredBrowsers) { const paths = macBrowserPaths[browser as keyof typeof macBrowserPaths] || []; for (const browserPath of paths) { if (fs.existsSync(browserPath)) { console.log(`Found ${browser} at ${browserPath}`); // Safari is detected but not supported by Puppeteer if (browser === "safari") { console.log( "Safari detected but not supported by Puppeteer. Continuing search..." ); continue; } return browserPath; } } } } else if (platform === "linux") { // Linux browser commands const linuxBrowserCommands = { chrome: ["google-chrome", "chromium", "chromium-browser"], edge: ["microsoft-edge"], brave: ["brave-browser"], firefox: ["firefox"], }; // Check each browser in preferred order for (const browser of preferredBrowsers) { const commands = linuxBrowserCommands[browser as keyof typeof linuxBrowserCommands] || []; for (const cmd of commands) { try { // Use more universal commands for Linux to find executables // command -v works in most shells, fallback to which or type const browserPath = execSync( `command -v ${cmd} || which ${cmd} || type -p ${cmd} 2>/dev/null`, { encoding: "utf8" } ).trim(); if (browserPath && fs.existsSync(browserPath)) { console.log(`Found ${browser} at ${browserPath}`); return browserPath; } } catch (e) { // Command not found, continue to next } } } // Additional check for unusual locations on Linux const alternativeLocations = [ "/usr/bin/google-chrome", "/usr/bin/chromium", "/usr/bin/chromium-browser", "/snap/bin/chromium", "/snap/bin/google-chrome", "/opt/google/chrome/chrome", ]; for (const location of alternativeLocations) { if (fs.existsSync(location)) { console.log(`Found browser at alternative location: ${location}`); return location; } } } throw new Error( `No browser executable found for platform ${platform}. Please specify a custom browser path.` ); } /** * Sets up cleanup handlers for the browser instance * @param browser Browser instance * @param userDataDir Path to the user data directory to clean up */ function setupBrowserCleanupHandlers( browser: puppeteer.Browser, userDataDir: string ): void { browser.on("disconnected", () => { console.log(`Browser disconnected. Scheduling cleanup for: ${userDataDir}`); // Clear any existing cleanup timeout when browser is disconnected cancelScheduledCleanup(); // Delayed cleanup to avoid conflicts with potential new browser instances setTimeout(() => { // Only remove the directory if no new browser has been launched if (!headlessBrowserInstance) { console.log(`Cleaning up temporary directory: ${userDataDir}`); try { fs.rmSync(userDataDir, { recursive: true, force: true }); console.log(`Successfully removed directory: ${userDataDir}`); } catch (error) { console.error(`Failed to remove directory ${userDataDir}:`, error); } } else { console.log( `Skipping cleanup for ${userDataDir} as new browser instance is active` ); } }, 5000); // 5-second delay for cleanup // Reset browser instance variables launchedBrowserWSEndpoint = null; headlessBrowserInstance = null; }); } // ===== Cleanup Management ===== /** * Cancels any scheduled browser cleanup */ function cancelScheduledCleanup(): void { if (browserCleanupTimeout) { console.log("Cancelling scheduled browser cleanup"); clearTimeout(browserCleanupTimeout); browserCleanupTimeout = null; } } /** * Schedules automatic cleanup of the browser instance after inactivity */ export function scheduleBrowserCleanup(): void { // Clear any existing timeout first cancelScheduledCleanup(); // Only schedule cleanup if we have an active browser instance if (headlessBrowserInstance) { console.log( `Scheduling browser cleanup in ${BROWSER_CLEANUP_TIMEOUT / 1000} seconds` ); browserCleanupTimeout = setTimeout(() => { console.log("Executing scheduled browser cleanup"); if (headlessBrowserInstance) { console.log("Closing headless browser instance"); headlessBrowserInstance.close(); headlessBrowserInstance = null; launchedBrowserWSEndpoint = null; } browserCleanupTimeout = null; }, BROWSER_CLEANUP_TIMEOUT); } } // ===== Public Browser Connection API ===== /** * Connects to a headless browser for web operations * @param url The URL to navigate to * @param options Connection and emulation options * @returns Promise resolving to browser, port, and page objects */ export async function connectToHeadlessBrowser( url: string, options: { blockResources?: boolean; customResourceBlockList?: string[]; emulateDevice?: "mobile" | "tablet" | "desktop"; emulateNetworkCondition?: "slow3G" | "fast3G" | "4G" | "offline"; viewport?: { width: number; height: number }; locale?: string; timezoneId?: string; userAgent?: string; waitForSelector?: string; waitForTimeout?: number; cookies?: Array<{ name: string; value: string; domain?: string; path?: string; }>; headers?: Record; } = {} ): Promise<{ browser: puppeteer.Browser; port: number; page: puppeteer.Page; }> { console.log( `Connecting to headless browser for ${url}${ options.blockResources ? " (blocking non-essential resources)" : "" }` ); try { // Validate URL format try { new URL(url); } catch (e) { throw new Error(`Invalid URL format: ${url}`); } // Get or create a browser instance const browser = await getHeadlessBrowserInstance(); if (!launchedBrowserWSEndpoint) { throw new Error("Failed to retrieve WebSocket endpoint for browser"); } // Extract port from WebSocket endpoint const port = parseInt( launchedBrowserWSEndpoint.split(":")[2].split("/")[0] ); // Always create a new page for each audit to avoid request interception conflicts console.log("Creating a new page for this audit"); const page = await browser.newPage(); // Set a longer timeout for navigation const navigationTimeout = 10000; // 10 seconds page.setDefaultNavigationTimeout(navigationTimeout); // Navigate to the URL console.log(`Navigating to ${url}`); await page.goto(url, { waitUntil: "networkidle2", // Wait until there are no more network connections for at least 500ms timeout: navigationTimeout, }); // Set custom headers if provided if (options.headers && Object.keys(options.headers).length > 0) { await page.setExtraHTTPHeaders(options.headers); console.log("Set custom HTTP headers"); } // Set cookies if provided if (options.cookies && options.cookies.length > 0) { const urlObj = new URL(url); const cookiesWithDomain = options.cookies.map((cookie) => ({ ...cookie, domain: cookie.domain || urlObj.hostname, path: cookie.path || "/", })); await page.setCookie(...cookiesWithDomain); console.log(`Set ${options.cookies.length} cookies`); } // Set custom viewport if specified if (options.viewport) { await page.setViewport(options.viewport); console.log( `Set viewport to ${options.viewport.width}x${options.viewport.height}` ); } else if (options.emulateDevice) { // Set common device emulation presets let viewport; let userAgent = options.userAgent; switch (options.emulateDevice) { case "mobile": viewport = { width: 375, height: 667, isMobile: true, hasTouch: true, }; userAgent = userAgent || "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X)"; break; case "tablet": viewport = { width: 768, height: 1024, isMobile: true, hasTouch: true, }; userAgent = userAgent || "Mozilla/5.0 (iPad; CPU OS 13_2_3 like Mac OS X)"; break; case "desktop": default: viewport = { width: 1280, height: 800, isMobile: false, hasTouch: false, }; break; } await page.setViewport(viewport); if (userAgent) await page.setUserAgent(userAgent); console.log(`Emulating ${options.emulateDevice} device`); } // Set locale and timezone if provided if (options.locale) { await page.evaluateOnNewDocument((locale) => { Object.defineProperty(navigator, "language", { get: () => locale }); Object.defineProperty(navigator, "languages", { get: () => [locale] }); }, options.locale); console.log(`Set locale to ${options.locale}`); } if (options.timezoneId) { await page.emulateTimezone(options.timezoneId); console.log(`Set timezone to ${options.timezoneId}`); } // Emulate network conditions if specified if (options.emulateNetworkCondition) { // Define network condition types that match puppeteer's expected format interface PuppeteerNetworkConditions { offline: boolean; latency?: number; download?: number; upload?: number; } let networkConditions: PuppeteerNetworkConditions; switch (options.emulateNetworkCondition) { case "slow3G": networkConditions = { offline: false, latency: 400, download: (500 * 1024) / 8, upload: (500 * 1024) / 8, }; break; case "fast3G": networkConditions = { offline: false, latency: 150, download: (1.5 * 1024 * 1024) / 8, upload: (750 * 1024) / 8, }; break; case "4G": networkConditions = { offline: false, latency: 50, download: (4 * 1024 * 1024) / 8, upload: (2 * 1024 * 1024) / 8, }; break; case "offline": networkConditions = { offline: true }; break; default: networkConditions = { offline: false }; } // @ts-ignore - Property might not be in types but is supported await page.emulateNetworkConditions(networkConditions); console.log( `Emulating ${options.emulateNetworkCondition} network conditions` ); } // Check if we should block resources based on the options if (options.blockResources) { const resourceTypesToBlock = options.customResourceBlockList || currentConfig.blockResourceTypes || ["image", "font", "media"]; await page.setRequestInterception(true); page.on("request", (request) => { // Block unnecessary resources to speed up loading const resourceType = request.resourceType(); if (resourceTypesToBlock.includes(resourceType)) { request.abort(); } else { request.continue(); } }); console.log( `Blocking resource types: ${resourceTypesToBlock.join(", ")}` ); } // Wait for a specific selector if requested if (options.waitForSelector) { try { console.log(`Waiting for selector: ${options.waitForSelector}`); await page.waitForSelector(options.waitForSelector, { timeout: options.waitForTimeout || 30000, }); } catch (selectorError: any) { console.warn( `Failed to find selector "${options.waitForSelector}": ${selectorError.message}` ); // Continue anyway, don't fail the whole operation } } return { browser, port, page }; } catch (error) { console.error("Failed to connect to headless browser:", error); throw new Error( `Failed to connect to headless browser: ${ error instanceof Error ? error.message : String(error) }` ); } }