mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-14 03:31:41 +00:00
113 lines
3.7 KiB
Python
113 lines
3.7 KiB
Python
![]() |
"""Pandas AI loader."""
|
||
|
|
||
|
from pathlib import Path
|
||
|
from typing import Dict, List, Optional, Any
|
||
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
|
||
|
from llama_index.readers.base import BaseReader
|
||
|
from llama_index.readers.schema.base import Document
|
||
|
from llama_index.readers.download import download_loader
|
||
|
from tempfile import TemporaryDirectory
|
||
|
|
||
|
|
||
|
class PandasAIReader(BaseReader):
|
||
|
"""Pandas AI reader.
|
||
|
|
||
|
Light wrapper around https://github.com/gventuri/pandas-ai.
|
||
|
|
||
|
Args:
|
||
|
llm (Optional[pandas.llm]): LLM to use. Defaults to None.
|
||
|
concat_rows (bool): whether to concatenate all rows into one document.
|
||
|
If set to False, a Document will be created for each row.
|
||
|
True by default.
|
||
|
|
||
|
col_joiner (str): Separator to use for joining cols per row.
|
||
|
Set to ", " by default.
|
||
|
|
||
|
row_joiner (str): Separator to use for joining each row.
|
||
|
Only used when `concat_rows=True`.
|
||
|
Set to "\n" by default.
|
||
|
|
||
|
pandas_config (dict): Options for the `pandas.read_csv` function call.
|
||
|
Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
|
||
|
for more information.
|
||
|
Set to empty dict by default, this means pandas will try to figure
|
||
|
out the separators, table head, etc. on its own.
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
llm: Optional[Any] = None,
|
||
|
concat_rows: bool = True,
|
||
|
col_joiner: str = ", ",
|
||
|
row_joiner: str = "\n",
|
||
|
pandas_config: dict = {},
|
||
|
) -> None:
|
||
|
"""Init params."""
|
||
|
try:
|
||
|
from pandasai.llm.openai import OpenAI
|
||
|
from pandasai import PandasAI
|
||
|
except ImportError:
|
||
|
raise ImportError("Please install pandasai to use this reader.")
|
||
|
|
||
|
self._llm = llm or OpenAI()
|
||
|
self._pandas_ai = PandasAI(llm)
|
||
|
|
||
|
self._concat_rows = concat_rows
|
||
|
self._col_joiner = col_joiner
|
||
|
self._row_joiner = row_joiner
|
||
|
self._pandas_config = pandas_config
|
||
|
|
||
|
def run_pandas_ai(
|
||
|
self,
|
||
|
initial_df: pd.DataFrame,
|
||
|
query: str,
|
||
|
is_conversational_answer: bool = False,
|
||
|
) -> Any:
|
||
|
"""Load dataframe."""
|
||
|
import pandasai
|
||
|
|
||
|
return self._pandas_ai.run(
|
||
|
initial_df, prompt=query, is_conversational_answer=is_conversational_answer
|
||
|
)
|
||
|
|
||
|
def load_data(
|
||
|
self,
|
||
|
initial_df: pd.DataFrame,
|
||
|
query: str,
|
||
|
is_conversational_answer: bool = False,
|
||
|
) -> List[Document]:
|
||
|
"""Parse file."""
|
||
|
|
||
|
result = self.run_pandas_ai(
|
||
|
initial_df, query, is_conversational_answer=is_conversational_answer
|
||
|
)
|
||
|
if is_conversational_answer:
|
||
|
return [Document(text=result)]
|
||
|
else:
|
||
|
if isinstance(result, (np.generic)):
|
||
|
result = pd.Series(result)
|
||
|
elif isinstance(result, (pd.Series, pd.DataFrame)):
|
||
|
pass
|
||
|
else:
|
||
|
raise ValueError("Unexpected type for result: {}".format(type(result)))
|
||
|
# if not conversational answer, use Pandas CSV Reader
|
||
|
PandasCSVReader = download_loader("PandasCSVReader")
|
||
|
|
||
|
reader = PandasCSVReader(
|
||
|
concat_rows=self._concat_rows,
|
||
|
col_joiner=self._col_joiner,
|
||
|
row_joiner=self._row_joiner,
|
||
|
pandas_config=self._pandas_config,
|
||
|
)
|
||
|
|
||
|
with TemporaryDirectory() as tmpdir:
|
||
|
outpath = Path(tmpdir) / "out.csv"
|
||
|
with outpath.open("w") as f:
|
||
|
# TODO: add option to specify index=False
|
||
|
result.to_csv(f, index=False)
|
||
|
|
||
|
docs = reader.load_data(outpath)
|
||
|
return docs
|