mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-15 20:21:14 +00:00

Opening the file with encoding = 'utf-8', fixes the following UnicodeDecodeError UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 1094: character maps to OS: Windows 10 x64
43 lines
1.2 KiB
Python
43 lines
1.2 KiB
Python
"""Simple CSV reader.
|
|
|
|
A parser for tabular data files.
|
|
|
|
"""
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from llama_index.readers.base import BaseReader
|
|
from llama_index.readers.schema.base import Document
|
|
|
|
|
|
class SimpleCSVReader(BaseReader):
|
|
"""CSV parser.
|
|
|
|
Args:
|
|
concat_rows (bool): whether to concatenate all rows into one document.
|
|
If set to False, a Document will be created for each row.
|
|
True by default.
|
|
|
|
"""
|
|
|
|
def __init__(self, *args: Any, concat_rows: bool = True, **kwargs: Any) -> None:
|
|
"""Init params."""
|
|
super().__init__(*args, **kwargs)
|
|
self._concat_rows = concat_rows
|
|
|
|
def load_data(
|
|
self, file: Path, extra_info: Optional[Dict] = None
|
|
) -> List[Document]:
|
|
"""Parse file."""
|
|
import csv
|
|
|
|
text_list = []
|
|
with open(file, "r", encoding='utf-8') as fp:
|
|
csv_reader = csv.reader(fp)
|
|
for row in csv_reader:
|
|
text_list.append(", ".join(row))
|
|
if self._concat_rows:
|
|
return [Document("\n".join(text_list), extra_info=extra_info)]
|
|
else:
|
|
return [Document(text, extra_info=extra_info) for text in text_list]
|