2023-09-21 14:51:08 -04:00
|
|
|
import click
|
2023-09-15 18:13:39 -04:00
|
|
|
from deltalake import DeltaTable
|
|
|
|
|
|
|
|
|
2023-09-21 14:51:08 -04:00
|
|
|
@click.command()
|
|
|
|
@click.option("--table-uri", type=str)
|
|
|
|
def run_check(table_uri):
|
|
|
|
print(f"Checking contents of table at {table_uri}")
|
2023-09-15 18:13:39 -04:00
|
|
|
delta_table = DeltaTable(
|
2023-09-21 14:51:08 -04:00
|
|
|
table_uri=table_uri,
|
2023-09-15 18:13:39 -04:00
|
|
|
)
|
|
|
|
|
2023-11-03 12:47:21 -04:00
|
|
|
df = delta_table.to_pandas()
|
2023-11-03 08:46:56 -04:00
|
|
|
expected_rows = 5
|
2023-11-03 12:47:21 -04:00
|
|
|
expected_columns = 18
|
|
|
|
print(f"Number of rows in table vs expected: {len(df)}/{expected_rows}")
|
|
|
|
print(f"Number of columns in table vs expected: {len(df.columns)}/{expected_columns}")
|
|
|
|
number_of_rows = len(df)
|
|
|
|
assert number_of_rows == 5, (
|
|
|
|
f"number of rows in generated table ({number_of_rows}) "
|
|
|
|
f"doesn't match expected value: {expected_rows}"
|
|
|
|
)
|
|
|
|
|
|
|
|
"""
|
|
|
|
The number of columns is associated with the flattened JSON structure of the partition output.
|
|
|
|
If this changes, it's most likely due to the metadata changing in the output.
|
|
|
|
"""
|
|
|
|
number_of_columns = len(df.columns)
|
|
|
|
assert number_of_columns == 18, (
|
|
|
|
f"number of columns in generated table ({number_of_columns}) doesn't "
|
|
|
|
f"match expected value: {expected_columns}"
|
2023-11-03 08:46:56 -04:00
|
|
|
)
|
2023-09-21 14:51:08 -04:00
|
|
|
print("table check complete")
|
2023-09-15 18:13:39 -04:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
run_check()
|