2025-03-20 07:57:43 +01:00

100 lines
2.5 KiB
Python

import pytest
from datahub.configuration.common import AllowDenyPattern
from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
@pytest.mark.parametrize(
"include, s3_uri, expected",
[
(
"s3://bucket/{table}/{partition0}/*.csv",
"s3://bucket/table/p1/test.csv",
True,
),
(
"s3://bucket/{table}/{partition0}/*.csv",
"s3://bucket/table/p1/p2/test.csv",
False,
),
],
)
def test_allowed_ignores_depth_mismatch(
include: str, s3_uri: str, expected: bool
) -> None:
# arrange
path_spec = PathSpec(
include=include,
table_name="{table}",
)
# act, assert
assert path_spec.allowed(s3_uri) == expected
@pytest.mark.parametrize(
"s3_uri, expected",
[
("s3://bucket/table-111/p1/test.csv", True),
("s3://bucket/table-222/p1/test.csv", False),
],
)
def test_allowed_tables_filter_pattern(s3_uri: str, expected: bool) -> None:
# arrange
path_spec = PathSpec(
include="s3://bucket/{table}/{partition0}/*.csv",
tables_filter_pattern=AllowDenyPattern(allow=["t.*111"]),
)
# act, assert
assert path_spec.allowed(s3_uri) == expected
@pytest.mark.parametrize(
"s3_uri, expected",
[
("s3://bucket/table-111/p1/", True),
("s3://bucket/table-222/p1/", False),
],
)
def test_dir_allowed_tables_filter_pattern(s3_uri: str, expected: bool) -> None:
# arrange
path_spec = PathSpec(
include="s3://bucket/{table}/{partition0}/*.csv",
tables_filter_pattern=AllowDenyPattern(allow=["t.*111"]),
)
# act, assert
assert path_spec.dir_allowed(s3_uri) == expected
@pytest.mark.parametrize(
"include, parse_path, expected_table_name, expected_table_path",
[
(
"s3://bucket/{table}/{partition0}/*.csv",
"s3://bucket/user_log/p1/test.csv",
"user_log",
"s3://bucket/user_log",
),
(
"s3://bucket/user_log/p1/*.csv",
"s3://bucket/user_log/p1/test.csv",
"test.csv",
"s3://bucket/user_log/p1/test.csv",
),
],
)
def test_extract_table_name_and_path(
include, parse_path, expected_table_name, expected_table_path
):
# arrange
path_spec = PathSpec(include=include)
# act
table_name, table_path = path_spec.extract_table_name_and_path(parse_path)
# assert
assert table_name == expected_table_name
assert table_path == expected_table_path