Jerry Liu e97bb81915
swap out gpt_index imports for llama_index imports (#49)
* cr

* cr

* cr

---------

Co-authored-by: Jerry Liu <jerry@robustintelligence.com>
Co-authored-by: Jesse Zhang <jessetanzhang@gmail.com>
2023-02-20 21:46:58 -08:00

43 lines
1.2 KiB
Python

"""Simple CSV reader.
A parser for tabular data files.
"""
from pathlib import Path
from typing import Any, Dict, List, Optional
from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document
class SimpleCSVReader(BaseReader):
"""CSV parser.
Args:
concat_rows (bool): whether to concatenate all rows into one document.
If set to False, a Document will be created for each row.
True by default.
"""
def __init__(self, *args: Any, concat_rows: bool = True, **kwargs: Any) -> None:
"""Init params."""
super().__init__(*args, **kwargs)
self._concat_rows = concat_rows
def load_data(
self, file: Path, extra_info: Optional[Dict] = None
) -> List[Document]:
"""Parse file."""
import csv
text_list = []
with open(file, "r") as fp:
csv_reader = csv.reader(fp)
for row in csv_reader:
text_list.append(", ".join(row))
if self._concat_rows:
return [Document("\n".join(text_list), extra_info=extra_info)]
else:
return [Document(text, extra_info=extra_info) for text in text_list]