diff --git a/graphrag/index/emit/parquet_table_emitter.py b/graphrag/index/emit/parquet_table_emitter.py index 1ba61713..068e8d08 100644 --- a/graphrag/index/emit/parquet_table_emitter.py +++ b/graphrag/index/emit/parquet_table_emitter.py @@ -21,12 +21,12 @@ class ParquetTableEmitter(TableEmitter): _storage: PipelineStorage _on_error: ErrorHandlerFn + extension = "parquet" def __init__( self, storage: PipelineStorage, on_error: ErrorHandlerFn, - extension = "parquet", ): """Create a new Parquet Table Emitter.""" self._storage = storage diff --git a/graphrag/query/input/loaders/utils.py b/graphrag/query/input/loaders/utils.py index 3c680f0f..38782329 100644 --- a/graphrag/query/input/loaders/utils.py +++ b/graphrag/query/input/loaders/utils.py @@ -165,8 +165,7 @@ def to_optional_float(data: pd.Series, column_name: str | None) -> float | None: if value is None: return None if not isinstance(value, float): - msg = f"value is not a float: {value} ({type(value)})" - raise ValueError(msg) + return float(value) else: msg = f"Column {column_name} not found in data" raise ValueError(msg) diff --git a/graphrag/utils/storage.py b/graphrag/utils/storage.py index 58f07c35..b902a851 100644 --- a/graphrag/utils/storage.py +++ b/graphrag/utils/storage.py @@ -52,7 +52,7 @@ async def _load_table_from_storage(name: str, storage: PipelineStorage) -> pd.Da return pd.read_parquet(BytesIO(await storage.get(name, as_bytes=True))) case "json": return pd.read_json( - StringIO(await storage.get(name, as_bytes=True)), + StringIO(await storage.get(name)), lines=False, orient="records", )