Remove excluded tags from website content

This commit is contained in:
unclecode 2024-09-12 16:11:20 +08:00
parent 396f430022
commit 30807f5535

View File

@ -441,6 +441,10 @@ def get_content_of_website_optimized(url: str, html: str, word_count_threshold:
image_description_min_word_threshold = kwargs.get('image_description_min_word_threshold', IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD)
for tag in kwargs.get('excluded_tags', []) or []:
for el in body.select(tag):
el.decompose()
if css_selector:
selected_elements = body.select(css_selector)
if not selected_elements: