mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-08 15:04:29 +00:00
Fix Datalake Json Error (#8246)
This commit is contained in:
parent
05ee89fdd6
commit
0b6e3741b3
@ -41,7 +41,7 @@ def read_tsv_from_s3(
|
|||||||
Read the tsv file from the s3 bucket and return a dataframe
|
Read the tsv file from the s3 bucket and return a dataframe
|
||||||
"""
|
"""
|
||||||
|
|
||||||
read_csv_from_s3(client, key, bucket_name, sep="\t", sample_size=sample_size)
|
return read_csv_from_s3(client, key, bucket_name, sep="\t", sample_size=sample_size)
|
||||||
|
|
||||||
|
|
||||||
def read_json_from_s3(
|
def read_json_from_s3(
|
||||||
@ -50,9 +50,14 @@ def read_json_from_s3(
|
|||||||
"""
|
"""
|
||||||
Read the json file from the s3 bucket and return a dataframe
|
Read the json file from the s3 bucket and return a dataframe
|
||||||
"""
|
"""
|
||||||
|
obj = client.get_object(Bucket=bucket_name, Key=key)
|
||||||
line_stream = client.get_object(Bucket=bucket_name, Key=key)["Body"].iter_lines()
|
json_text = obj["Body"].read().decode("utf-8")
|
||||||
return pd.DataFrame.from_records(map(json.loads, line_stream), nrows=sample_size)
|
data = json.loads(json_text)
|
||||||
|
if isinstance(data, list):
|
||||||
|
return pd.DataFrame.from_dict(data[:sample_size])
|
||||||
|
return pd.DataFrame.from_dict(
|
||||||
|
{key: pd.Series(value) for key, value in data.items()}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def read_parquet_from_s3(client: Any, key: str, bucket_name: str) -> DataFrame:
|
def read_parquet_from_s3(client: Any, key: str, bucket_name: str) -> DataFrame:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user