Update minimum selenium version supported for crawler (#2921)

* Update minimum requirement for selenium for using the crawler

* Updating pin of grpcio to match default in google colab

* Adding requests requirement
This commit is contained in:
Sebastian 2022-08-03 10:11:18 +02:00 committed by GitHub
parent 2c56305ed3
commit bde3261b07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 6 deletions

View File

@ -13,7 +13,7 @@ try:
from webdriver_manager.chrome import ChromeDriverManager from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException from selenium.common.exceptions import StaleElementReferenceException, WebDriverException
from selenium import webdriver from selenium import webdriver
except (ImportError, ModuleNotFoundError) as ie: except (ImportError, ModuleNotFoundError) as ie:
from haystack.utils.import_utils import _optional_component_not_installed from haystack.utils.import_utils import _optional_component_not_installed
@ -22,6 +22,7 @@ except (ImportError, ModuleNotFoundError) as ie:
from haystack.nodes.base import BaseComponent from haystack.nodes.base import BaseComponent
from haystack.schema import Document from haystack.schema import Document
from haystack.errors import NodeError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -94,15 +95,15 @@ class Crawler(BaseComponent):
options.add_argument("--no-sandbox") options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage") options.add_argument("--disable-dev-shm-usage")
self.driver = webdriver.Chrome(service=Service("chromedriver"), options=options) self.driver = webdriver.Chrome(service=Service("chromedriver"), options=options)
except: except WebDriverException as exc:
raise Exception( raise NodeError(
""" """
\'chromium-driver\' needs to be installed manually when running colab. Follow the below given commands: \'chromium-driver\' needs to be installed manually when running colab. Follow the below given commands:
!apt-get update !apt-get update
!apt install chromium-driver !apt install chromium-driver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin !cp /usr/lib/chromium-browser/chromedriver /usr/bin
If it has already been installed, please check if it has been copied to the right directory i.e. to \'/usr/bin\'""" If it has already been installed, please check if it has been copied to the right directory i.e. to \'/usr/bin\'"""
) ) from exc
else: else:
logger.info("'chrome-driver' will be automatically installed.") logger.info("'chrome-driver' will be automatically installed.")
self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

View File

@ -165,7 +165,7 @@ audio =
beir = beir =
beir; platform_system != 'Windows' beir; platform_system != 'Windows'
crawler = crawler =
selenium !=4.1.4 # due to https://github.com/SeleniumHQ/selenium/issues/10612 selenium>=4.0.0,!=4.1.4 # Avoid 4.1.4 due to https://github.com/SeleniumHQ/selenium/issues/10612
webdriver-manager webdriver-manager
preprocessing = preprocessing =
beautifulsoup4 beautifulsoup4
@ -188,7 +188,8 @@ ray =
aiorwlock>=1.3.0,<2 aiorwlock>=1.3.0,<2
colab = colab =
grpcio==1.43.0 grpcio==1.47.0
requests>=2.25 # Needed to avoid dependency conflict with crawler https://github.com/deepset-ai/haystack/pull/2921
dev = dev =
pre-commit pre-commit
# Type check # Type check