mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-09 18:24:48 +00:00
100 lines
2.5 KiB
Python
100 lines
2.5 KiB
Python
import pytest
|
|
|
|
from datahub.configuration.common import AllowDenyPattern
|
|
from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"include, s3_uri, expected",
|
|
[
|
|
(
|
|
"s3://bucket/{table}/{partition0}/*.csv",
|
|
"s3://bucket/table/p1/test.csv",
|
|
True,
|
|
),
|
|
(
|
|
"s3://bucket/{table}/{partition0}/*.csv",
|
|
"s3://bucket/table/p1/p2/test.csv",
|
|
False,
|
|
),
|
|
],
|
|
)
|
|
def test_allowed_ignores_depth_mismatch(
|
|
include: str, s3_uri: str, expected: bool
|
|
) -> None:
|
|
# arrange
|
|
path_spec = PathSpec(
|
|
include=include,
|
|
table_name="{table}",
|
|
)
|
|
|
|
# act, assert
|
|
assert path_spec.allowed(s3_uri) == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"s3_uri, expected",
|
|
[
|
|
("s3://bucket/table-111/p1/test.csv", True),
|
|
("s3://bucket/table-222/p1/test.csv", False),
|
|
],
|
|
)
|
|
def test_allowed_tables_filter_pattern(s3_uri: str, expected: bool) -> None:
|
|
# arrange
|
|
path_spec = PathSpec(
|
|
include="s3://bucket/{table}/{partition0}/*.csv",
|
|
tables_filter_pattern=AllowDenyPattern(allow=["t.*111"]),
|
|
)
|
|
|
|
# act, assert
|
|
assert path_spec.allowed(s3_uri) == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"s3_uri, expected",
|
|
[
|
|
("s3://bucket/table-111/p1/", True),
|
|
("s3://bucket/table-222/p1/", False),
|
|
],
|
|
)
|
|
def test_dir_allowed_tables_filter_pattern(s3_uri: str, expected: bool) -> None:
|
|
# arrange
|
|
path_spec = PathSpec(
|
|
include="s3://bucket/{table}/{partition0}/*.csv",
|
|
tables_filter_pattern=AllowDenyPattern(allow=["t.*111"]),
|
|
)
|
|
|
|
# act, assert
|
|
assert path_spec.dir_allowed(s3_uri) == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"include, parse_path, expected_table_name, expected_table_path",
|
|
[
|
|
(
|
|
"s3://bucket/{table}/{partition0}/*.csv",
|
|
"s3://bucket/user_log/p1/test.csv",
|
|
"user_log",
|
|
"s3://bucket/user_log",
|
|
),
|
|
(
|
|
"s3://bucket/user_log/p1/*.csv",
|
|
"s3://bucket/user_log/p1/test.csv",
|
|
"test.csv",
|
|
"s3://bucket/user_log/p1/test.csv",
|
|
),
|
|
],
|
|
)
|
|
def test_extract_table_name_and_path(
|
|
include, parse_path, expected_table_name, expected_table_path
|
|
):
|
|
# arrange
|
|
path_spec = PathSpec(include=include)
|
|
|
|
# act
|
|
table_name, table_path = path_spec.extract_table_name_and_path(parse_path)
|
|
|
|
# assert
|
|
assert table_name == expected_table_name
|
|
assert table_path == expected_table_path
|