fix: make the crawler runnable and testable on Windows (#3830)

* fix crawler and try to run CI

* more compact expression

* try to fix

* improve naming regex

* revert regex

* make test_url compatible wirh Windows

* better conditional expression
This commit is contained in:
Stefano Fiorucci 2023-01-10 20:27:28 +01:00 committed by GitHub
parent 7f8910192e
commit be31178892
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 3 additions and 4 deletions

View File

@ -21,7 +21,6 @@ env:
--ignore=test/pipelines/test_ray.py
--ignore=test/document_stores/test_knowledge_graph.py
--ignore=test/nodes/test_audio.py
--ignore=test/nodes/test_connector.py
--ignore=test/nodes/test_summarizer_translation.py
--ignore=test/nodes/test_summarizer.py
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

View File

@ -100,8 +100,8 @@ class Crawler(BaseComponent):
super().__init__()
IN_COLAB = "google.colab" in sys.modules
IN_AZUREML = True if os.environ.get("AZUREML_ENVIRONMENT_IMAGE", None) == "True" else False
IS_ROOT = True if os.geteuid() == 0 else False
IN_AZUREML = os.environ.get("AZUREML_ENVIRONMENT_IMAGE", None) == "True"
IS_ROOT = sys.platform not in ["win32", "cygwin"] and os.geteuid() == 0
if webdriver_options is None:
webdriver_options = ["--headless", "--disable-gpu", "--disable-dev-shm-usage", "--single-process"]

View File

@ -18,7 +18,7 @@ from ..conftest import SAMPLES_PATH
@pytest.fixture(scope="session")
def test_url():
return f"file://{SAMPLES_PATH.absolute()}/crawler"
return (SAMPLES_PATH / "crawler").absolute().as_uri()
def content_match(crawler: Crawler, url: str, crawled_page: Path):