Ok, should have a cleaner playwright story on multithreaded environments

This commit is contained in:
Jake Poznanski 2025-09-22 18:14:30 +00:00
parent 1a64728420
commit 4e77ecac3f

View File

@ -19,6 +19,7 @@ import re
import sqlite3 import sqlite3
import threading import threading
import unittest import unittest
import weakref
from contextlib import contextmanager from contextlib import contextmanager
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional from typing import List, Optional
@ -159,61 +160,61 @@ def get_equation_hash(equation, bg_color="white", text_color="black", font_size=
_thread_local = threading.local() _thread_local = threading.local()
def _cleanup_playwright(playwright, browser):
print(f"Cleaning up playwright context on {threading.get_ident()} native id {threading.get_native_id()}, on PID {os.getpid()}, {threading.active_count()} active threads")
try:
browser.close()
except Exception:
pass # Ignore errors during cleanup
try:
playwright.stop()
except Exception:
pass # Ignore errors during cleanup
class _BrowserOwner:
def __init__(self):
p = sync_playwright().start()
b = p.chromium.launch()
self.p = p
self.browser = b
self._closed = False
# Important: don't capture `self` or globals in the finalizer
self._finalizer = weakref.finalize(self, _cleanup_playwright, p, b)
def close_now(self):
if not self._closed:
self._closed = True
self._finalizer() # idempotent; runs at most once
def _get_owner():
owner = getattr(_thread_local, "owner", None)
if owner is None:
owner = _BrowserOwner()
_thread_local.owner = owner
return owner
@contextmanager @contextmanager
def browser_context(): def browser_context():
""" """
Context manager for Playwright browser instances. Context manager for Playwright browser instances.
Returns a browser context that can be used for rendering equations. Returns a browser context that can be used for rendering equations.
Automatically handles initialization and cleanup. Automatically handles initialization and cleanup.
Cleans up and recreates the browser every 100 uses to prevent memory leaks.
""" """
# Initialize usage counter if not present
if not hasattr(_thread_local, "usage_count"):
_thread_local.usage_count = 0
# Check if we already have a browser for this thread
if not hasattr(_thread_local, "playwright"):
_thread_local.playwright = sync_playwright().start()
_thread_local.browser = _thread_local.playwright.chromium.launch()
_thread_local.usage_count = 0
# Increment usage counter
_thread_local.usage_count += 1
print(f"DEBUG: Initializing playwright context on {threading.get_ident()} native id {threading.get_native_id()}, on PID {os.getpid()}, {threading.active_count()} active threads, {_thread_local.usage_count}")
# Check if we need to clean up the browser (every 100 uses)
if _thread_local.usage_count > 100:
# Clean up the old browser completely
print("Cleanup up old playwright instance to prevent memory leaks...")
try: owner = _get_owner()
_thread_local.browser.close() ctx = owner.browser.new_context(viewport={"width": 800, "height": 400})
except Exception:
pass # Ignore errors during cleanup
try:
_thread_local.playwright.stop()
except Exception:
pass # Ignore errors during cleanup
# Remove the attributes to force re-initialization
delattr(_thread_local, "playwright")
delattr(_thread_local, "browser")
# Re-initialize with a fresh browser
_thread_local.playwright = sync_playwright().start()
_thread_local.browser = _thread_local.playwright.chromium.launch()
_thread_local.usage_count = 1 # Reset to 1 since we just used it
# Create a new context for this operation
context = _thread_local.browser.new_context(viewport={"width": 800, "height": 400})
try: try:
yield context yield ctx
finally: finally:
# Clean up the context after use try:
context.close() ctx.close()
except Exception:
pass
def render_equation( def render_equation(