mirror of
https://github.com/AgentDeskAI/browser-tools-mcp.git
synced 2025-06-27 00:41:26 +00:00
926 lines
29 KiB
TypeScript
926 lines
29 KiB
TypeScript
import fs from "fs";
|
|
import puppeteer from "puppeteer-core";
|
|
import path from "path";
|
|
import os from "os";
|
|
import { execSync } from "child_process";
|
|
import * as ChromeLauncher from "chrome-launcher";
|
|
// ===== Configuration Types and Defaults =====
|
|
|
|
/**
|
|
* Configuration interface for the Puppeteer service
|
|
*/
|
|
export interface PuppeteerServiceConfig {
|
|
// Browser preferences
|
|
preferredBrowsers?: string[]; // Order of browser preference ("chrome", "edge", "brave", "firefox")
|
|
customBrowserPaths?: { [key: string]: string }; // Custom browser executable paths
|
|
|
|
// Connection settings
|
|
debugPorts?: number[]; // Ports to try when connecting to existing browsers
|
|
connectionTimeout?: number; // Timeout for connection attempts in ms
|
|
maxRetries?: number; // Maximum number of retries for connections
|
|
|
|
// Browser cleanup settings
|
|
browserCleanupTimeout?: number; // Timeout before closing inactive browsers (ms)
|
|
|
|
// Performance settings
|
|
blockResourceTypes?: string[]; // Resource types to block for performance
|
|
}
|
|
|
|
// Default configuration values
|
|
const DEFAULT_CONFIG: PuppeteerServiceConfig = {
|
|
preferredBrowsers: ["chrome", "edge", "brave", "firefox"],
|
|
debugPorts: [9222, 9223, 9224, 9225],
|
|
connectionTimeout: 10000,
|
|
maxRetries: 3,
|
|
browserCleanupTimeout: 60000,
|
|
blockResourceTypes: ["image", "font", "media"],
|
|
};
|
|
|
|
// Browser support notes:
|
|
// - Chrome/Chromium: Fully supported (primary target)
|
|
// - Edge: Fully supported (Chromium-based)
|
|
// - Brave: Fully supported (Chromium-based)
|
|
// - Firefox: Partially supported (some features may not work)
|
|
// - Safari: Not supported by Puppeteer
|
|
|
|
// ===== Global State =====
|
|
|
|
// Current active configuration
|
|
let currentConfig: PuppeteerServiceConfig = { ...DEFAULT_CONFIG };
|
|
|
|
// Browser instance management
|
|
let headlessBrowserInstance: puppeteer.Browser | null = null;
|
|
let launchedBrowserWSEndpoint: string | null = null;
|
|
|
|
// Cleanup management
|
|
let browserCleanupTimeout: NodeJS.Timeout | null = null;
|
|
let BROWSER_CLEANUP_TIMEOUT = 60000; // 60 seconds default
|
|
|
|
// Cache for browser executable paths
|
|
let detectedBrowserPath: string | null = null;
|
|
|
|
// ===== Configuration Functions =====
|
|
|
|
/**
|
|
* Configure the Puppeteer service with custom settings
|
|
* @param config Partial configuration to override defaults
|
|
*/
|
|
export function configurePuppeteerService(
|
|
config: Partial<PuppeteerServiceConfig>
|
|
): void {
|
|
currentConfig = { ...DEFAULT_CONFIG, ...config };
|
|
|
|
// Update the timeout if it was changed
|
|
if (
|
|
config.browserCleanupTimeout &&
|
|
config.browserCleanupTimeout !== BROWSER_CLEANUP_TIMEOUT
|
|
) {
|
|
BROWSER_CLEANUP_TIMEOUT = config.browserCleanupTimeout;
|
|
}
|
|
|
|
console.log("Puppeteer service configured:", currentConfig);
|
|
}
|
|
|
|
// ===== Browser Management =====
|
|
|
|
/**
|
|
* Get or create a headless browser instance
|
|
* @returns Promise resolving to a browser instance
|
|
*/
|
|
async function getHeadlessBrowserInstance(): Promise<puppeteer.Browser> {
|
|
console.log("Browser instance request started");
|
|
|
|
// Cancel any scheduled cleanup
|
|
cancelScheduledCleanup();
|
|
|
|
// Try to reuse existing browser
|
|
if (headlessBrowserInstance) {
|
|
try {
|
|
const pages = await headlessBrowserInstance.pages();
|
|
console.log(
|
|
`Reusing existing headless browser with ${pages.length} pages`
|
|
);
|
|
return headlessBrowserInstance;
|
|
} catch (error) {
|
|
console.log(
|
|
"Existing browser instance is no longer valid, creating a new one"
|
|
);
|
|
headlessBrowserInstance = null;
|
|
launchedBrowserWSEndpoint = null;
|
|
}
|
|
}
|
|
|
|
// Create a new browser instance
|
|
return launchNewBrowser();
|
|
}
|
|
|
|
/**
|
|
* Launches a new browser instance
|
|
* @returns Promise resolving to a browser instance
|
|
*/
|
|
async function launchNewBrowser(): Promise<puppeteer.Browser> {
|
|
console.log("Creating new headless browser instance");
|
|
|
|
// Setup temporary user data directory
|
|
const userDataDir = createTempUserDataDir();
|
|
let browser: puppeteer.Browser | null = null;
|
|
|
|
try {
|
|
// Configure launch options
|
|
const launchOptions = configureLaunchOptions(userDataDir);
|
|
|
|
// Set custom browser executable
|
|
await setCustomBrowserExecutable(launchOptions);
|
|
|
|
// Launch the browser
|
|
console.log(
|
|
"Launching browser with options:",
|
|
JSON.stringify({
|
|
headless: launchOptions.headless,
|
|
executablePath: launchOptions.executablePath,
|
|
})
|
|
);
|
|
|
|
browser = await puppeteer.launch(launchOptions);
|
|
|
|
// Store references to the browser instance
|
|
launchedBrowserWSEndpoint = browser.wsEndpoint();
|
|
headlessBrowserInstance = browser;
|
|
|
|
// Setup cleanup handlers
|
|
setupBrowserCleanupHandlers(browser, userDataDir);
|
|
|
|
console.log("Browser ready");
|
|
return browser;
|
|
} catch (error) {
|
|
console.error("Failed to launch browser:", error);
|
|
|
|
// Clean up resources
|
|
if (browser) {
|
|
try {
|
|
await browser.close();
|
|
} catch (closeError) {
|
|
console.error("Error closing browser:", closeError);
|
|
}
|
|
headlessBrowserInstance = null;
|
|
launchedBrowserWSEndpoint = null;
|
|
}
|
|
|
|
// Clean up the temporary directory
|
|
try {
|
|
fs.rmSync(userDataDir, { recursive: true, force: true });
|
|
} catch (fsError) {
|
|
console.error("Error removing temporary directory:", fsError);
|
|
}
|
|
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Creates a temporary user data directory for the browser
|
|
* @returns Path to the created directory
|
|
*/
|
|
function createTempUserDataDir(): string {
|
|
const tempDir = os.tmpdir();
|
|
const uniqueId = `${Date.now().toString()}-${Math.random()
|
|
.toString(36)
|
|
.substring(2)}`;
|
|
const userDataDir = path.join(tempDir, `browser-debug-profile-${uniqueId}`);
|
|
fs.mkdirSync(userDataDir, { recursive: true });
|
|
console.log(`Using temporary user data directory: ${userDataDir}`);
|
|
return userDataDir;
|
|
}
|
|
|
|
/**
|
|
* Configures browser launch options
|
|
* @param userDataDir Path to the user data directory
|
|
* @returns Launch options object
|
|
*/
|
|
function configureLaunchOptions(userDataDir: string): any {
|
|
const launchOptions: any = {
|
|
args: [
|
|
"--remote-debugging-port=0", // Use dynamic port
|
|
`--user-data-dir=${userDataDir}`,
|
|
"--no-first-run",
|
|
"--no-default-browser-check",
|
|
"--disable-dev-shm-usage",
|
|
"--disable-extensions",
|
|
"--disable-component-extensions-with-background-pages",
|
|
"--disable-background-networking",
|
|
"--disable-backgrounding-occluded-windows",
|
|
"--disable-default-apps",
|
|
"--disable-sync",
|
|
"--disable-translate",
|
|
"--metrics-recording-only",
|
|
"--no-pings",
|
|
"--safebrowsing-disable-auto-update",
|
|
],
|
|
};
|
|
|
|
// Add headless mode (using any to bypass type checking issues)
|
|
launchOptions.headless = "new";
|
|
|
|
return launchOptions;
|
|
}
|
|
|
|
/**
|
|
* Sets a custom browser executable path if configured
|
|
* @param launchOptions Launch options object to modify
|
|
*/
|
|
async function setCustomBrowserExecutable(launchOptions: any): Promise<void> {
|
|
// First, try to use a custom browser path from configuration
|
|
if (
|
|
currentConfig.customBrowserPaths &&
|
|
Object.keys(currentConfig.customBrowserPaths).length > 0
|
|
) {
|
|
const preferredBrowsers = currentConfig.preferredBrowsers || [
|
|
"chrome",
|
|
"edge",
|
|
"brave",
|
|
"firefox",
|
|
];
|
|
|
|
for (const browser of preferredBrowsers) {
|
|
if (
|
|
currentConfig.customBrowserPaths[browser] &&
|
|
fs.existsSync(currentConfig.customBrowserPaths[browser])
|
|
) {
|
|
launchOptions.executablePath =
|
|
currentConfig.customBrowserPaths[browser];
|
|
|
|
// Set product to firefox if using Firefox browser
|
|
if (browser === "firefox") {
|
|
launchOptions.product = "firefox";
|
|
}
|
|
|
|
console.log(
|
|
`Using custom ${browser} path: ${launchOptions.executablePath}`
|
|
);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If no custom path is found, use cached path or detect a new one
|
|
try {
|
|
if (detectedBrowserPath && fs.existsSync(detectedBrowserPath)) {
|
|
console.log(`Using cached browser path: ${detectedBrowserPath}`);
|
|
launchOptions.executablePath = detectedBrowserPath;
|
|
|
|
// Check if the detected browser is Firefox
|
|
if (detectedBrowserPath.includes("firefox")) {
|
|
launchOptions.product = "firefox";
|
|
console.log("Setting product to firefox for Firefox browser");
|
|
}
|
|
} else {
|
|
detectedBrowserPath = await findBrowserExecutablePath();
|
|
launchOptions.executablePath = detectedBrowserPath;
|
|
|
|
// Check if the detected browser is Firefox
|
|
if (detectedBrowserPath.includes("firefox")) {
|
|
launchOptions.product = "firefox";
|
|
console.log("Setting product to firefox for Firefox browser");
|
|
}
|
|
|
|
console.log(
|
|
`Using detected browser path: ${launchOptions.executablePath}`
|
|
);
|
|
}
|
|
} catch (error) {
|
|
console.error("Failed to detect browser executable path:", error);
|
|
throw new Error(
|
|
"No browser executable path found. Please specify a custom browser path in the configuration."
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Find a browser executable path on the current system
|
|
* @returns Path to a browser executable
|
|
*/
|
|
async function findBrowserExecutablePath(): Promise<string> {
|
|
// Try to use chrome-launcher (most reliable method)
|
|
try {
|
|
console.log("Attempting to find Chrome using chrome-launcher...");
|
|
|
|
// Launch Chrome using chrome-launcher
|
|
const chrome = await ChromeLauncher.launch({
|
|
chromeFlags: ["--headless"],
|
|
handleSIGINT: false,
|
|
});
|
|
|
|
// chrome-launcher stores the Chrome executable path differently than Puppeteer
|
|
// Let's try different approaches to get it
|
|
|
|
// First check if we can access it directly
|
|
let chromePath = "";
|
|
|
|
// Chrome version data often contains the path
|
|
if (chrome.process && chrome.process.spawnfile) {
|
|
chromePath = chrome.process.spawnfile;
|
|
console.log("Found Chrome path from process.spawnfile");
|
|
} else {
|
|
// Try to get the Chrome path from chrome-launcher
|
|
// In newer versions, it's directly accessible
|
|
console.log("Trying to determine Chrome path using other methods");
|
|
|
|
// This will actually return the real Chrome path for us
|
|
// chrome-launcher has this inside but doesn't expose it directly
|
|
const possiblePaths = [
|
|
process.env.CHROME_PATH,
|
|
// Common paths by OS
|
|
...(process.platform === "darwin"
|
|
? ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"]
|
|
: process.platform === "win32"
|
|
? [
|
|
`${process.env.PROGRAMFILES}\\Google\\Chrome\\Application\\chrome.exe`,
|
|
`${process.env["PROGRAMFILES(X86)"]}\\Google\\Chrome\\Application\\chrome.exe`,
|
|
]
|
|
: ["/usr/bin/google-chrome"]),
|
|
].filter(Boolean);
|
|
|
|
// Use the first valid path
|
|
for (const p of possiblePaths) {
|
|
if (p && fs.existsSync(p)) {
|
|
chromePath = p;
|
|
console.log("Found Chrome path from common locations");
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Always kill the Chrome instance we just launched
|
|
await chrome.kill();
|
|
|
|
if (chromePath) {
|
|
console.log(`Chrome found via chrome-launcher: ${chromePath}`);
|
|
return chromePath;
|
|
} else {
|
|
console.log("Chrome launched but couldn't determine executable path");
|
|
}
|
|
} catch (error) {
|
|
// Check if it's a ChromeNotInstalledError
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
if (
|
|
errorMessage.includes("No Chrome installations found") ||
|
|
(error as any)?.code === "ERR_LAUNCHER_NOT_INSTALLED"
|
|
) {
|
|
console.log("Chrome not installed. Falling back to manual detection");
|
|
} else {
|
|
console.error("Failed to find Chrome using chrome-launcher:", error);
|
|
console.log("Falling back to manual detection");
|
|
}
|
|
}
|
|
|
|
// If chrome-launcher failed, use manual detection
|
|
|
|
const platform = process.platform;
|
|
const preferredBrowsers = currentConfig.preferredBrowsers || [
|
|
"chrome",
|
|
"edge",
|
|
"brave",
|
|
"firefox",
|
|
];
|
|
|
|
console.log(`Attempting to detect browser executable path on ${platform}...`);
|
|
|
|
// Platform-specific detection strategies
|
|
if (platform === "win32") {
|
|
// Windows - try registry detection for Chrome
|
|
let registryPath = null;
|
|
try {
|
|
console.log("Checking Windows registry for Chrome...");
|
|
// Try HKLM first
|
|
const regOutput = execSync(
|
|
'reg query "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\chrome.exe" /ve',
|
|
{ encoding: "utf8" }
|
|
);
|
|
|
|
// Extract path from registry output
|
|
const match = regOutput.match(/REG_(?:SZ|EXPAND_SZ)\s+([^\s]+)/i);
|
|
if (match && match[1]) {
|
|
registryPath = match[1].replace(/\\"/g, "");
|
|
// Verify the path exists
|
|
if (fs.existsSync(registryPath)) {
|
|
console.log(`Found Chrome via HKLM registry: ${registryPath}`);
|
|
return registryPath;
|
|
}
|
|
}
|
|
} catch (e) {
|
|
// Try HKCU if HKLM fails
|
|
try {
|
|
console.log("Checking user registry for Chrome...");
|
|
const regOutput = execSync(
|
|
'reg query "HKEY_CURRENT_USER\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\chrome.exe" /ve',
|
|
{ encoding: "utf8" }
|
|
);
|
|
|
|
// Extract path from registry output
|
|
const match = regOutput.match(/REG_(?:SZ|EXPAND_SZ)\s+([^\s]+)/i);
|
|
if (match && match[1]) {
|
|
registryPath = match[1].replace(/\\"/g, "");
|
|
// Verify the path exists
|
|
if (fs.existsSync(registryPath)) {
|
|
console.log(`Found Chrome via HKCU registry: ${registryPath}`);
|
|
return registryPath;
|
|
}
|
|
}
|
|
} catch (innerError) {
|
|
console.log(
|
|
"Failed to find Chrome via registry, continuing with path checks"
|
|
);
|
|
}
|
|
}
|
|
|
|
// Try to find Chrome through BLBeacon registry key (version info)
|
|
try {
|
|
console.log("Checking Chrome BLBeacon registry...");
|
|
const regOutput = execSync(
|
|
'reg query "HKEY_CURRENT_USER\\Software\\Google\\Chrome\\BLBeacon" /v version',
|
|
{ encoding: "utf8" }
|
|
);
|
|
|
|
if (regOutput) {
|
|
// If BLBeacon exists, Chrome is likely installed in the default location
|
|
const programFiles = process.env.PROGRAMFILES || "C:\\Program Files";
|
|
const programFilesX86 =
|
|
process.env["PROGRAMFILES(X86)"] || "C:\\Program Files (x86)";
|
|
|
|
const defaultChromePaths = [
|
|
path.join(programFiles, "Google\\Chrome\\Application\\chrome.exe"),
|
|
path.join(programFilesX86, "Google\\Chrome\\Application\\chrome.exe"),
|
|
];
|
|
|
|
for (const chromePath of defaultChromePaths) {
|
|
if (fs.existsSync(chromePath)) {
|
|
console.log(
|
|
`Found Chrome via BLBeacon registry hint: ${chromePath}`
|
|
);
|
|
return chromePath;
|
|
}
|
|
}
|
|
}
|
|
} catch (e) {
|
|
console.log("Failed to find Chrome via BLBeacon registry");
|
|
}
|
|
|
|
// Continue with regular path checks
|
|
const programFiles = process.env.PROGRAMFILES || "C:\\Program Files";
|
|
const programFilesX86 =
|
|
process.env["PROGRAMFILES(X86)"] || "C:\\Program Files (x86)";
|
|
|
|
// Common Windows browser paths
|
|
const winBrowserPaths = {
|
|
chrome: [
|
|
path.join(programFiles, "Google\\Chrome\\Application\\chrome.exe"),
|
|
path.join(programFilesX86, "Google\\Chrome\\Application\\chrome.exe"),
|
|
],
|
|
edge: [
|
|
path.join(programFiles, "Microsoft\\Edge\\Application\\msedge.exe"),
|
|
path.join(programFilesX86, "Microsoft\\Edge\\Application\\msedge.exe"),
|
|
],
|
|
brave: [
|
|
path.join(
|
|
programFiles,
|
|
"BraveSoftware\\Brave-Browser\\Application\\brave.exe"
|
|
),
|
|
path.join(
|
|
programFilesX86,
|
|
"BraveSoftware\\Brave-Browser\\Application\\brave.exe"
|
|
),
|
|
],
|
|
firefox: [
|
|
path.join(programFiles, "Mozilla Firefox\\firefox.exe"),
|
|
path.join(programFilesX86, "Mozilla Firefox\\firefox.exe"),
|
|
],
|
|
};
|
|
|
|
// Check each browser in preferred order
|
|
for (const browser of preferredBrowsers) {
|
|
const paths =
|
|
winBrowserPaths[browser as keyof typeof winBrowserPaths] || [];
|
|
for (const browserPath of paths) {
|
|
if (fs.existsSync(browserPath)) {
|
|
console.log(`Found ${browser} at ${browserPath}`);
|
|
return browserPath;
|
|
}
|
|
}
|
|
}
|
|
} else if (platform === "darwin") {
|
|
// macOS browser paths
|
|
const macBrowserPaths = {
|
|
chrome: ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"],
|
|
edge: ["/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"],
|
|
brave: ["/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"],
|
|
firefox: ["/Applications/Firefox.app/Contents/MacOS/firefox"],
|
|
safari: ["/Applications/Safari.app/Contents/MacOS/Safari"],
|
|
};
|
|
|
|
// Check each browser in preferred order
|
|
for (const browser of preferredBrowsers) {
|
|
const paths =
|
|
macBrowserPaths[browser as keyof typeof macBrowserPaths] || [];
|
|
for (const browserPath of paths) {
|
|
if (fs.existsSync(browserPath)) {
|
|
console.log(`Found ${browser} at ${browserPath}`);
|
|
// Safari is detected but not supported by Puppeteer
|
|
if (browser === "safari") {
|
|
console.log(
|
|
"Safari detected but not supported by Puppeteer. Continuing search..."
|
|
);
|
|
continue;
|
|
}
|
|
return browserPath;
|
|
}
|
|
}
|
|
}
|
|
} else if (platform === "linux") {
|
|
// Linux browser commands
|
|
const linuxBrowserCommands = {
|
|
chrome: ["google-chrome", "chromium", "chromium-browser"],
|
|
edge: ["microsoft-edge"],
|
|
brave: ["brave-browser"],
|
|
firefox: ["firefox"],
|
|
};
|
|
|
|
// Check each browser in preferred order
|
|
for (const browser of preferredBrowsers) {
|
|
const commands =
|
|
linuxBrowserCommands[browser as keyof typeof linuxBrowserCommands] ||
|
|
[];
|
|
for (const cmd of commands) {
|
|
try {
|
|
// Use more universal commands for Linux to find executables
|
|
// command -v works in most shells, fallback to which or type
|
|
const browserPath = execSync(
|
|
`command -v ${cmd} || which ${cmd} || type -p ${cmd} 2>/dev/null`,
|
|
{ encoding: "utf8" }
|
|
).trim();
|
|
|
|
if (browserPath && fs.existsSync(browserPath)) {
|
|
console.log(`Found ${browser} at ${browserPath}`);
|
|
return browserPath;
|
|
}
|
|
} catch (e) {
|
|
// Command not found, continue to next
|
|
}
|
|
}
|
|
}
|
|
|
|
// Additional check for unusual locations on Linux
|
|
const alternativeLocations = [
|
|
"/usr/bin/google-chrome",
|
|
"/usr/bin/chromium",
|
|
"/usr/bin/chromium-browser",
|
|
"/snap/bin/chromium",
|
|
"/snap/bin/google-chrome",
|
|
"/opt/google/chrome/chrome",
|
|
];
|
|
|
|
for (const location of alternativeLocations) {
|
|
if (fs.existsSync(location)) {
|
|
console.log(`Found browser at alternative location: ${location}`);
|
|
return location;
|
|
}
|
|
}
|
|
}
|
|
|
|
throw new Error(
|
|
`No browser executable found for platform ${platform}. Please specify a custom browser path.`
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Sets up cleanup handlers for the browser instance
|
|
* @param browser Browser instance
|
|
* @param userDataDir Path to the user data directory to clean up
|
|
*/
|
|
function setupBrowserCleanupHandlers(
|
|
browser: puppeteer.Browser,
|
|
userDataDir: string
|
|
): void {
|
|
browser.on("disconnected", () => {
|
|
console.log(`Browser disconnected. Scheduling cleanup for: ${userDataDir}`);
|
|
|
|
// Clear any existing cleanup timeout when browser is disconnected
|
|
cancelScheduledCleanup();
|
|
|
|
// Delayed cleanup to avoid conflicts with potential new browser instances
|
|
setTimeout(() => {
|
|
// Only remove the directory if no new browser has been launched
|
|
if (!headlessBrowserInstance) {
|
|
console.log(`Cleaning up temporary directory: ${userDataDir}`);
|
|
try {
|
|
fs.rmSync(userDataDir, { recursive: true, force: true });
|
|
console.log(`Successfully removed directory: ${userDataDir}`);
|
|
} catch (error) {
|
|
console.error(`Failed to remove directory ${userDataDir}:`, error);
|
|
}
|
|
} else {
|
|
console.log(
|
|
`Skipping cleanup for ${userDataDir} as new browser instance is active`
|
|
);
|
|
}
|
|
}, 5000); // 5-second delay for cleanup
|
|
|
|
// Reset browser instance variables
|
|
launchedBrowserWSEndpoint = null;
|
|
headlessBrowserInstance = null;
|
|
});
|
|
}
|
|
|
|
// ===== Cleanup Management =====
|
|
|
|
/**
|
|
* Cancels any scheduled browser cleanup
|
|
*/
|
|
function cancelScheduledCleanup(): void {
|
|
if (browserCleanupTimeout) {
|
|
console.log("Cancelling scheduled browser cleanup");
|
|
clearTimeout(browserCleanupTimeout);
|
|
browserCleanupTimeout = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Schedules automatic cleanup of the browser instance after inactivity
|
|
*/
|
|
export function scheduleBrowserCleanup(): void {
|
|
// Clear any existing timeout first
|
|
cancelScheduledCleanup();
|
|
|
|
// Only schedule cleanup if we have an active browser instance
|
|
if (headlessBrowserInstance) {
|
|
console.log(
|
|
`Scheduling browser cleanup in ${BROWSER_CLEANUP_TIMEOUT / 1000} seconds`
|
|
);
|
|
|
|
browserCleanupTimeout = setTimeout(() => {
|
|
console.log("Executing scheduled browser cleanup");
|
|
if (headlessBrowserInstance) {
|
|
console.log("Closing headless browser instance");
|
|
headlessBrowserInstance.close();
|
|
headlessBrowserInstance = null;
|
|
launchedBrowserWSEndpoint = null;
|
|
}
|
|
browserCleanupTimeout = null;
|
|
}, BROWSER_CLEANUP_TIMEOUT);
|
|
}
|
|
}
|
|
|
|
// ===== Public Browser Connection API =====
|
|
|
|
/**
|
|
* Connects to a headless browser for web operations
|
|
* @param url The URL to navigate to
|
|
* @param options Connection and emulation options
|
|
* @returns Promise resolving to browser, port, and page objects
|
|
*/
|
|
export async function connectToHeadlessBrowser(
|
|
url: string,
|
|
options: {
|
|
blockResources?: boolean;
|
|
customResourceBlockList?: string[];
|
|
emulateDevice?: "mobile" | "tablet" | "desktop";
|
|
emulateNetworkCondition?: "slow3G" | "fast3G" | "4G" | "offline";
|
|
viewport?: { width: number; height: number };
|
|
locale?: string;
|
|
timezoneId?: string;
|
|
userAgent?: string;
|
|
waitForSelector?: string;
|
|
waitForTimeout?: number;
|
|
cookies?: Array<{
|
|
name: string;
|
|
value: string;
|
|
domain?: string;
|
|
path?: string;
|
|
}>;
|
|
headers?: Record<string, string>;
|
|
} = {}
|
|
): Promise<{
|
|
browser: puppeteer.Browser;
|
|
port: number;
|
|
page: puppeteer.Page;
|
|
}> {
|
|
console.log(
|
|
`Connecting to headless browser for ${url}${
|
|
options.blockResources ? " (blocking non-essential resources)" : ""
|
|
}`
|
|
);
|
|
|
|
try {
|
|
// Validate URL format
|
|
try {
|
|
new URL(url);
|
|
} catch (e) {
|
|
throw new Error(`Invalid URL format: ${url}`);
|
|
}
|
|
|
|
// Get or create a browser instance
|
|
const browser = await getHeadlessBrowserInstance();
|
|
|
|
if (!launchedBrowserWSEndpoint) {
|
|
throw new Error("Failed to retrieve WebSocket endpoint for browser");
|
|
}
|
|
|
|
// Extract port from WebSocket endpoint
|
|
const port = parseInt(
|
|
launchedBrowserWSEndpoint.split(":")[2].split("/")[0]
|
|
);
|
|
|
|
// Always create a new page for each audit to avoid request interception conflicts
|
|
console.log("Creating a new page for this audit");
|
|
const page = await browser.newPage();
|
|
|
|
// Set a longer timeout for navigation
|
|
const navigationTimeout = 10000; // 10 seconds
|
|
page.setDefaultNavigationTimeout(navigationTimeout);
|
|
|
|
// Navigate to the URL
|
|
console.log(`Navigating to ${url}`);
|
|
await page.goto(url, {
|
|
waitUntil: "networkidle2", // Wait until there are no more network connections for at least 500ms
|
|
timeout: navigationTimeout,
|
|
});
|
|
|
|
// Set custom headers if provided
|
|
if (options.headers && Object.keys(options.headers).length > 0) {
|
|
await page.setExtraHTTPHeaders(options.headers);
|
|
console.log("Set custom HTTP headers");
|
|
}
|
|
|
|
// Set cookies if provided
|
|
if (options.cookies && options.cookies.length > 0) {
|
|
const urlObj = new URL(url);
|
|
const cookiesWithDomain = options.cookies.map((cookie) => ({
|
|
...cookie,
|
|
domain: cookie.domain || urlObj.hostname,
|
|
path: cookie.path || "/",
|
|
}));
|
|
await page.setCookie(...cookiesWithDomain);
|
|
console.log(`Set ${options.cookies.length} cookies`);
|
|
}
|
|
|
|
// Set custom viewport if specified
|
|
if (options.viewport) {
|
|
await page.setViewport(options.viewport);
|
|
console.log(
|
|
`Set viewport to ${options.viewport.width}x${options.viewport.height}`
|
|
);
|
|
} else if (options.emulateDevice) {
|
|
// Set common device emulation presets
|
|
let viewport;
|
|
let userAgent = options.userAgent;
|
|
|
|
switch (options.emulateDevice) {
|
|
case "mobile":
|
|
viewport = {
|
|
width: 375,
|
|
height: 667,
|
|
isMobile: true,
|
|
hasTouch: true,
|
|
};
|
|
userAgent =
|
|
userAgent ||
|
|
"Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X)";
|
|
break;
|
|
case "tablet":
|
|
viewport = {
|
|
width: 768,
|
|
height: 1024,
|
|
isMobile: true,
|
|
hasTouch: true,
|
|
};
|
|
userAgent =
|
|
userAgent || "Mozilla/5.0 (iPad; CPU OS 13_2_3 like Mac OS X)";
|
|
break;
|
|
case "desktop":
|
|
default:
|
|
viewport = {
|
|
width: 1280,
|
|
height: 800,
|
|
isMobile: false,
|
|
hasTouch: false,
|
|
};
|
|
break;
|
|
}
|
|
|
|
await page.setViewport(viewport);
|
|
if (userAgent) await page.setUserAgent(userAgent);
|
|
|
|
console.log(`Emulating ${options.emulateDevice} device`);
|
|
}
|
|
|
|
// Set locale and timezone if provided
|
|
if (options.locale) {
|
|
await page.evaluateOnNewDocument((locale) => {
|
|
Object.defineProperty(navigator, "language", { get: () => locale });
|
|
Object.defineProperty(navigator, "languages", { get: () => [locale] });
|
|
}, options.locale);
|
|
console.log(`Set locale to ${options.locale}`);
|
|
}
|
|
|
|
if (options.timezoneId) {
|
|
await page.emulateTimezone(options.timezoneId);
|
|
console.log(`Set timezone to ${options.timezoneId}`);
|
|
}
|
|
|
|
// Emulate network conditions if specified
|
|
if (options.emulateNetworkCondition) {
|
|
// Define network condition types that match puppeteer's expected format
|
|
interface PuppeteerNetworkConditions {
|
|
offline: boolean;
|
|
latency?: number;
|
|
download?: number;
|
|
upload?: number;
|
|
}
|
|
|
|
let networkConditions: PuppeteerNetworkConditions;
|
|
|
|
switch (options.emulateNetworkCondition) {
|
|
case "slow3G":
|
|
networkConditions = {
|
|
offline: false,
|
|
latency: 400,
|
|
download: (500 * 1024) / 8,
|
|
upload: (500 * 1024) / 8,
|
|
};
|
|
break;
|
|
case "fast3G":
|
|
networkConditions = {
|
|
offline: false,
|
|
latency: 150,
|
|
download: (1.5 * 1024 * 1024) / 8,
|
|
upload: (750 * 1024) / 8,
|
|
};
|
|
break;
|
|
case "4G":
|
|
networkConditions = {
|
|
offline: false,
|
|
latency: 50,
|
|
download: (4 * 1024 * 1024) / 8,
|
|
upload: (2 * 1024 * 1024) / 8,
|
|
};
|
|
break;
|
|
case "offline":
|
|
networkConditions = { offline: true };
|
|
break;
|
|
default:
|
|
networkConditions = { offline: false };
|
|
}
|
|
|
|
// @ts-ignore - Property might not be in types but is supported
|
|
await page.emulateNetworkConditions(networkConditions);
|
|
console.log(
|
|
`Emulating ${options.emulateNetworkCondition} network conditions`
|
|
);
|
|
}
|
|
|
|
// Check if we should block resources based on the options
|
|
if (options.blockResources) {
|
|
const resourceTypesToBlock = options.customResourceBlockList ||
|
|
currentConfig.blockResourceTypes || ["image", "font", "media"];
|
|
|
|
await page.setRequestInterception(true);
|
|
page.on("request", (request) => {
|
|
// Block unnecessary resources to speed up loading
|
|
const resourceType = request.resourceType();
|
|
if (resourceTypesToBlock.includes(resourceType)) {
|
|
request.abort();
|
|
} else {
|
|
request.continue();
|
|
}
|
|
});
|
|
|
|
console.log(
|
|
`Blocking resource types: ${resourceTypesToBlock.join(", ")}`
|
|
);
|
|
}
|
|
|
|
// Wait for a specific selector if requested
|
|
if (options.waitForSelector) {
|
|
try {
|
|
console.log(`Waiting for selector: ${options.waitForSelector}`);
|
|
await page.waitForSelector(options.waitForSelector, {
|
|
timeout: options.waitForTimeout || 30000,
|
|
});
|
|
} catch (selectorError: any) {
|
|
console.warn(
|
|
`Failed to find selector "${options.waitForSelector}": ${selectorError.message}`
|
|
);
|
|
// Continue anyway, don't fail the whole operation
|
|
}
|
|
}
|
|
|
|
return { browser, port, page };
|
|
} catch (error) {
|
|
console.error("Failed to connect to headless browser:", error);
|
|
throw new Error(
|
|
`Failed to connect to headless browser: ${
|
|
error instanceof Error ? error.message : String(error)
|
|
}`
|
|
);
|
|
}
|
|
}
|