mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-23 00:42:28 +00:00
Update minimum selenium version supported for crawler (#2921)
* Update minimum requirement for selenium for using the crawler * Updating pin of grpcio to match default in google colab * Adding requests requirement
This commit is contained in:
parent
2c56305ed3
commit
bde3261b07
@ -13,7 +13,7 @@ try:
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.common.exceptions import StaleElementReferenceException
|
||||
from selenium.common.exceptions import StaleElementReferenceException, WebDriverException
|
||||
from selenium import webdriver
|
||||
except (ImportError, ModuleNotFoundError) as ie:
|
||||
from haystack.utils.import_utils import _optional_component_not_installed
|
||||
@ -22,6 +22,7 @@ except (ImportError, ModuleNotFoundError) as ie:
|
||||
|
||||
from haystack.nodes.base import BaseComponent
|
||||
from haystack.schema import Document
|
||||
from haystack.errors import NodeError
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -94,15 +95,15 @@ class Crawler(BaseComponent):
|
||||
options.add_argument("--no-sandbox")
|
||||
options.add_argument("--disable-dev-shm-usage")
|
||||
self.driver = webdriver.Chrome(service=Service("chromedriver"), options=options)
|
||||
except:
|
||||
raise Exception(
|
||||
except WebDriverException as exc:
|
||||
raise NodeError(
|
||||
"""
|
||||
\'chromium-driver\' needs to be installed manually when running colab. Follow the below given commands:
|
||||
!apt-get update
|
||||
!apt install chromium-driver
|
||||
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
|
||||
If it has already been installed, please check if it has been copied to the right directory i.e. to \'/usr/bin\'"""
|
||||
)
|
||||
) from exc
|
||||
else:
|
||||
logger.info("'chrome-driver' will be automatically installed.")
|
||||
self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
|
||||
|
@ -165,7 +165,7 @@ audio =
|
||||
beir =
|
||||
beir; platform_system != 'Windows'
|
||||
crawler =
|
||||
selenium !=4.1.4 # due to https://github.com/SeleniumHQ/selenium/issues/10612
|
||||
selenium>=4.0.0,!=4.1.4 # Avoid 4.1.4 due to https://github.com/SeleniumHQ/selenium/issues/10612
|
||||
webdriver-manager
|
||||
preprocessing =
|
||||
beautifulsoup4
|
||||
@ -188,7 +188,8 @@ ray =
|
||||
aiorwlock>=1.3.0,<2
|
||||
|
||||
colab =
|
||||
grpcio==1.43.0
|
||||
grpcio==1.47.0
|
||||
requests>=2.25 # Needed to avoid dependency conflict with crawler https://github.com/deepset-ai/haystack/pull/2921
|
||||
dev =
|
||||
pre-commit
|
||||
# Type check
|
||||
|
Loading…
x
Reference in New Issue
Block a user