fix: not working long page screenshot (#403)
This commit is contained in:
parent
98acc4254d
commit
07b4c1c0ed
@ -1639,11 +1639,9 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
Returns:
|
Returns:
|
||||||
str: The base64-encoded screenshot data
|
str: The base64-encoded screenshot data
|
||||||
"""
|
"""
|
||||||
dimensions = await self.get_page_dimensions(page)
|
need_scroll = await self.page_need_scroll(page)
|
||||||
page_height = dimensions['height']
|
|
||||||
if page_height < kwargs.get(
|
if not need_scroll:
|
||||||
"screenshot_height_threshold", SCREENSHOT_HEIGHT_TRESHOLD
|
|
||||||
):
|
|
||||||
# Page is short enough, just take a screenshot
|
# Page is short enough, just take a screenshot
|
||||||
return await self.take_screenshot_naive(page)
|
return await self.take_screenshot_naive(page)
|
||||||
else:
|
else:
|
||||||
@ -2159,3 +2157,21 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
return {width: scrollWidth, height: scrollHeight};
|
return {width: scrollWidth, height: scrollHeight};
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
async def page_need_scroll(self, page: Page):
|
||||||
|
"""
|
||||||
|
Determine whether the page need to scroll
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page: Playwright page object
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
page should scroll or not
|
||||||
|
"""
|
||||||
|
return await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const scrollHeight = document.documentElement.scrollHeight;
|
||||||
|
const viewportHeight = window.innerHeight;
|
||||||
|
return scrollHeight > viewportHeight;
|
||||||
|
}
|
||||||
|
""")
|
Loading…
x
Reference in New Issue
Block a user