datahub/metadata-ingestion/tests/unit/excel/test_excel_samples.py
2025-05-06 18:50:38 -05:00

49 lines
1.7 KiB
Python

import io
from datahub.ingestion.source.excel.excel_file import ExcelFile
from datahub.ingestion.source.excel.report import ExcelSourceReport
def test_sample_files(pytestconfig):
file_names = [
("file_1.xlsx", "Monthly Reporting", 1, 5, 4, 17),
("file_1.xlsx", "Dec", 1, 4, 3, 14),
("file_1.xlsx", "Jan", 1, 5, 4, 14),
("file_1.xlsx", "Feb", 1, 5, 4, 14),
("file_2.xlsx", "Test Group Reporting ", 1, 19, 18, 46),
("file_3.xlsx", "Sheet1", 1, 5, 4, 209),
("file_4.xlsx", "in", 1, 3, 2, 252),
("file_5.xlsx", "Test1_Test", 4, 8, 4, 24),
("file_6.xlsx", "Test2_Test", 2, 6, 4, 24),
("file_7.xlsx", "12345678 (Current Month)", 1, 4, 3, 68),
("file_8.xlsx", "Test3_Test", 4, 8, 4, 24),
("file_9.xlsx", "Business Report", 6, 11, 5, 5),
("file_10.xlsx", "Sheet1", 0, 0, 0, 0),
("file_10.xlsx", "Sheet2", 0, 0, 0, 0),
]
test_resources_dir = pytestconfig.rootpath / "tests/unit/excel"
for file_name, sheet, header, footer, rows, columns in file_names:
sample_file = test_resources_dir / f"data/{file_name}"
report = ExcelSourceReport()
assert sample_file.exists()
with open(sample_file, "rb") as f:
file_content = f.read()
bytes_io = io.BytesIO(file_content)
xls = ExcelFile(file_name, bytes_io, report)
result = xls.load_workbook()
assert result is True
table = xls.get_table(sheet)
if table is None:
assert header == 0
else:
assert table.header_row == header
assert table.footer_row == footer
assert table.row_count == rows
assert table.column_count == columns