jakub-sandomierz-deepsense-ai 0ca154a0f3
Fix: MongoDB connector URI password redaction, basic unit tests for Git connector (#2268)
MongoDB connector:
Issue:
[MongoDB
documentation](https://www.mongodb.com/docs/manual/reference/connection-string/)
states that characters `$ : / ? # [ ] @` must be percent encoded. URI
with password containing such special character will not be redacted.

Fix:
This fix removes usage of `unquote_plus` on password which allows
detected password to match with one inside URI and successfully replace
it.

Git connector:
Added very basic unit tests for repository filtering methods. Their
impact is rather minimal but showcases current limitation in
`is_file_type_supported` method.
2024-01-08 11:27:08 +00:00

62 lines
2.2 KiB
Python

from pathlib import Path
import pytest
from unstructured.ingest.connector.git import GitAccessConfig, GitSourceConnector, SimpleGitConfig
@pytest.mark.parametrize(
("given_file_path", "then_is_supported"),
[
(Path("src/submodule/document.md"), True),
(Path("src/submodule/document.txt"), True),
(Path("src/submodule/document.pdf"), True),
(Path("src/submodule/document.doc"), True),
(Path("src/submodule/document.docx"), True),
(Path("src/submodule/document.eml"), True),
(Path("src/submodule/document.html"), True),
(Path("src/submodule/document.png"), True),
(Path("src/submodule/document.jpg"), True),
(Path("src/submodule/document.ppt"), True),
(Path("src/submodule/document.pptx"), True),
(Path("src/submodule/document.xml"), True),
(Path("src/submodule/code.py"), False),
(Path("src/submodule/Dockerfile"), False),
(Path("src/submodule/Makefile"), False),
(Path("src/submodule/LICENSE"), False),
],
)
def test_connector_supports_file(given_file_path, then_is_supported):
when_is_supported = GitSourceConnector.is_file_type_supported(str(given_file_path))
assert when_is_supported == then_is_supported
class FakeGitSourceConnectorImpl(GitSourceConnector):
def get_ingest_docs(self):
pass
@pytest.mark.parametrize(
("given_file_path", "given_file_glob", "then_matches_glob"),
[
(Path("LICENSE"), None, True),
(Path("Makefile"), ["Makefile"], True),
(Path("src/my/super/module/main.py"), ["**/*.py"], True),
(Path("src/my/super/module/main.pyc"), ["**/*.py"], False),
],
)
def test_connector_does_path_match_glob(given_file_path, given_file_glob, then_matches_glob):
connector_config = SimpleGitConfig(
url="some_fake_url",
access_config=GitAccessConfig(access_token="some_fake_token"),
file_glob=given_file_glob,
)
connector = FakeGitSourceConnectorImpl(
processor_config=None, read_config=None, connector_config=connector_config
)
when_matches_glob = connector.does_path_match_glob(str(given_file_path))
assert when_matches_glob == then_matches_glob