mirror of
https://github.com/unclecode/crawl4ai.git
synced 2025-12-29 11:27:52 +00:00
Remove excluded tags from website content
This commit is contained in:
parent
396f430022
commit
30807f5535
@ -441,6 +441,10 @@ def get_content_of_website_optimized(url: str, html: str, word_count_threshold:
|
||||
|
||||
image_description_min_word_threshold = kwargs.get('image_description_min_word_threshold', IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD)
|
||||
|
||||
for tag in kwargs.get('excluded_tags', []) or []:
|
||||
for el in body.select(tag):
|
||||
el.decompose()
|
||||
|
||||
if css_selector:
|
||||
selected_elements = body.select(css_selector)
|
||||
if not selected_elements:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user