From 1ab2b977c09cc08681b33d1914870d77aaa20bb7 Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Tue, 24 May 2022 12:34:31 +0200 Subject: [PATCH] Fix crawler (#2591) --- haystack/nodes/connector/crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/nodes/connector/crawler.py b/haystack/nodes/connector/crawler.py index eaacdce14..d2378d9ad 100644 --- a/haystack/nodes/connector/crawler.py +++ b/haystack/nodes/connector/crawler.py @@ -280,7 +280,7 @@ class Crawler(BaseComponent): self, base_url: str, filter_urls: Optional[List] = None, existed_links: List = None ) -> set: self.driver.get(base_url) - a_elements = self.driver.find_elements_by_tag_name("a") + a_elements = self.driver.find_elements_by_xpath("//a[@href]") sub_links = set() if not (existed_links and base_url in existed_links): if filter_urls: