fix: not working long page screenshot (#403)

This commit is contained in:
Guilume 2025-01-05 17:04:34 +08:00 committed by GitHub
parent 98acc4254d
commit 07b4c1c0ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1639,11 +1639,9 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
Returns:
str: The base64-encoded screenshot data
"""
dimensions = await self.get_page_dimensions(page)
page_height = dimensions['height']
if page_height < kwargs.get(
"screenshot_height_threshold", SCREENSHOT_HEIGHT_TRESHOLD
):
need_scroll = await self.page_need_scroll(page)
if not need_scroll:
# Page is short enough, just take a screenshot
return await self.take_screenshot_naive(page)
else:
@ -2158,4 +2156,22 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
const {scrollWidth, scrollHeight} = document.documentElement;
return {width: scrollWidth, height: scrollHeight};
}
""")
async def page_need_scroll(self, page: Page):
"""
Determine whether the page need to scroll
Args:
page: Playwright page object
Returns:
page should scroll or not
"""
return await page.evaluate("""
() => {
const scrollHeight = document.documentElement.scrollHeight;
const viewportHeight = window.innerHeight;
return scrollHeight > viewportHeight;
}
""")