From ac6f726252713cbd05dd1c39b46b53f7a3d0946d Mon Sep 17 00:00:00 2001 From: Mayur Singal <39544459+ulixius9@users.noreply.github.com> Date: Mon, 19 May 2025 11:44:19 +0530 Subject: [PATCH] Fix #1550: Metadata ingestion errors from Azure Data Lake (#21261) (cherry picked from commit 9ec424a3fad0f885861a1324b393a7eba71bd134) --- ingestion/src/metadata/readers/dataframe/json.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ingestion/src/metadata/readers/dataframe/json.py b/ingestion/src/metadata/readers/dataframe/json.py index 753850364d0..47c3c5ab32a 100644 --- a/ingestion/src/metadata/readers/dataframe/json.py +++ b/ingestion/src/metadata/readers/dataframe/json.py @@ -35,7 +35,11 @@ def _get_json_text(key: str, text: bytes, decode: bool) -> Union[str, bytes]: with zipfile.ZipFile(io.BytesIO(text)) as zip_file: processed_text = zip_file.read(zip_file.infolist()[0]) if decode: - return processed_text.decode(UTF_8) if isinstance(text, bytes) else text + return ( + processed_text.decode(UTF_8, errors="ignore") + if isinstance(text, bytes) + else text + ) return processed_text