mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-24 14:45:55 +00:00
* add decoding stage to gz/zip files. Files that where zip/gz where not being decoded. This was leading to a error when we wanted them to be. * remove unnecessary comment --------- Co-authored-by: Carl Kristensen <carl.johan.coelho.kristensen@schibsted.com>
This commit is contained in:
parent
512625c809
commit
74df616679
@ -28,14 +28,15 @@ logger = ingestion_logger()
|
|||||||
|
|
||||||
|
|
||||||
def _get_json_text(key: str, text: bytes, decode: bool) -> Union[str, bytes]:
|
def _get_json_text(key: str, text: bytes, decode: bool) -> Union[str, bytes]:
|
||||||
|
processed_text: Union[str, bytes] = text
|
||||||
if key.endswith(".gz"):
|
if key.endswith(".gz"):
|
||||||
return gzip.decompress(text)
|
processed_text = gzip.decompress(text)
|
||||||
if key.endswith(".zip"):
|
if key.endswith(".zip"):
|
||||||
with zipfile.ZipFile(io.BytesIO(text)) as zip_file:
|
with zipfile.ZipFile(io.BytesIO(text)) as zip_file:
|
||||||
return zip_file.read(zip_file.infolist()[0]).decode(UTF_8)
|
processed_text = zip_file.read(zip_file.infolist()[0]).decode(UTF_8)
|
||||||
if decode:
|
if decode:
|
||||||
return text.decode(UTF_8) if isinstance(text, bytes) else text
|
return processed_text.decode(UTF_8) if isinstance(text, bytes) else text
|
||||||
return text
|
return processed_text
|
||||||
|
|
||||||
|
|
||||||
class JSONDataFrameReader(DataFrameReader):
|
class JSONDataFrameReader(DataFrameReader):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user