mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-26 02:10:41 +00:00
Fix crawler (#2591)
This commit is contained in:
parent
867695ad0c
commit
1ab2b977c0
@ -280,7 +280,7 @@ class Crawler(BaseComponent):
|
|||||||
self, base_url: str, filter_urls: Optional[List] = None, existed_links: List = None
|
self, base_url: str, filter_urls: Optional[List] = None, existed_links: List = None
|
||||||
) -> set:
|
) -> set:
|
||||||
self.driver.get(base_url)
|
self.driver.get(base_url)
|
||||||
a_elements = self.driver.find_elements_by_tag_name("a")
|
a_elements = self.driver.find_elements_by_xpath("//a[@href]")
|
||||||
sub_links = set()
|
sub_links = set()
|
||||||
if not (existed_links and base_url in existed_links):
|
if not (existed_links and base_url in existed_links):
|
||||||
if filter_urls:
|
if filter_urls:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user