45 lines
1.3 KiB
Python
Raw Normal View History

2023-02-03 20:12:03 -08:00
"""Simple CSV reader.
A parser for tabular data files.
"""
from pathlib import Path
from typing import Any, Dict, List, Optional
from gpt_index.readers.base import BaseReader
from gpt_index.readers.schema.base import Document
class SimpleCSVReader(BaseReader):
"""CSV parser.
Args:
concat_rows (bool): whether to concatenate all rows into one document.
If set to False, a Document will be created for each row.
True by default.
"""
def __init__(self, *args: Any, concat_rows: bool = True, **kwargs: Any) -> None:
"""Init params."""
super().__init__(*args, **kwargs)
self._concat_rows = concat_rows
def load_data(
self, file: Path, extra_info: Optional[Dict] = None
) -> List[Document]:
"""Parse file."""
try:
import csv
except ImportError:
raise ValueError("csv module is required to read CSV files.")
text_list = []
with open(file, "r") as fp:
csv_reader = csv.reader(fp)
for row in csv_reader:
text_list.append(", ".join(row))
if self._concat_rows:
return [Document("\n".join(text_list), extra_info=extra_info)]
else:
return [Document(text, extra_info=extra_info) for text in text_list]