diff --git a/ingestion/src/metadata/utils/datalake/csv_tsv_dispatch.py b/ingestion/src/metadata/utils/datalake/csv_tsv_dispatch.py index 64815e5c6d2..f9b2c46668b 100644 --- a/ingestion/src/metadata/utils/datalake/csv_tsv_dispatch.py +++ b/ingestion/src/metadata/utils/datalake/csv_tsv_dispatch.py @@ -16,8 +16,6 @@ from Csv and Tsv file formats from functools import singledispatch from typing import Any -import pandas as pd - from metadata.generated.schema.entity.services.connections.database.datalake.azureConfig import ( AzureConfig, ) @@ -42,6 +40,8 @@ CSV_SEPARATOR = "," def read_from_pandas(path: str, separator: str, storage_options=None): + import pandas as pd # pylint: disable=import-outside-toplevel + chunk_list = [] with pd.read_csv( path, sep=separator, chunksize=CHUNKSIZE, storage_options=storage_options diff --git a/ingestion/src/metadata/utils/datalake/parquet_dispatch.py b/ingestion/src/metadata/utils/datalake/parquet_dispatch.py index fdef03752f7..612beed8ac7 100644 --- a/ingestion/src/metadata/utils/datalake/parquet_dispatch.py +++ b/ingestion/src/metadata/utils/datalake/parquet_dispatch.py @@ -18,8 +18,6 @@ from Parquet file formats from functools import singledispatch from typing import Any -import pandas as pd - from metadata.generated.schema.entity.services.connections.database.datalake.azureConfig import ( AzureConfig, ) @@ -95,6 +93,8 @@ def _(_: S3Config, key: str, bucket_name: str, connection_kwargs, **kwargs): @read_parquet_dispatch.register def _(config_source: AzureConfig, key: str, bucket_name: str, **kwargs): + import pandas as pd # pylint: disable=import-outside-toplevel + storage_options = return_azure_storage_options(config_source) account_url = AZURE_PATH.format( bucket_name=bucket_name,