Update minimum selenium version supported for crawler (#2921)

* Update minimum requirement for selenium for using the crawler

* Updating pin of grpcio to match default in google colab

* Adding requests requirement
This commit is contained in:
Sebastian 2022-08-03 10:11:18 +02:00 committed by GitHub
parent 2c56305ed3
commit bde3261b07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 6 deletions

View File

@ -13,7 +13,7 @@ try:
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import StaleElementReferenceException, WebDriverException
from selenium import webdriver
except (ImportError, ModuleNotFoundError) as ie:
from haystack.utils.import_utils import _optional_component_not_installed
@ -22,6 +22,7 @@ except (ImportError, ModuleNotFoundError) as ie:
from haystack.nodes.base import BaseComponent
from haystack.schema import Document
from haystack.errors import NodeError
logger = logging.getLogger(__name__)
@ -94,15 +95,15 @@ class Crawler(BaseComponent):
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
self.driver = webdriver.Chrome(service=Service("chromedriver"), options=options)
except:
raise Exception(
except WebDriverException as exc:
raise NodeError(
"""
\'chromium-driver\' needs to be installed manually when running colab. Follow the below given commands:
!apt-get update
!apt install chromium-driver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
If it has already been installed, please check if it has been copied to the right directory i.e. to \'/usr/bin\'"""
)
) from exc
else:
logger.info("'chrome-driver' will be automatically installed.")
self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

View File

@ -165,7 +165,7 @@ audio =
beir =
beir; platform_system != 'Windows'
crawler =
selenium !=4.1.4 # due to https://github.com/SeleniumHQ/selenium/issues/10612
selenium>=4.0.0,!=4.1.4 # Avoid 4.1.4 due to https://github.com/SeleniumHQ/selenium/issues/10612
webdriver-manager
preprocessing =
beautifulsoup4
@ -188,7 +188,8 @@ ray =
aiorwlock>=1.3.0,<2
colab =
grpcio==1.43.0
grpcio==1.47.0
requests>=2.25 # Needed to avoid dependency conflict with crawler https://github.com/deepset-ai/haystack/pull/2921
dev =
pre-commit
# Type check