Jerry Liu e97bb81915
swap out gpt_index imports for llama_index imports (#49)
* cr

* cr

* cr

---------

Co-authored-by: Jerry Liu <jerry@robustintelligence.com>
Co-authored-by: Jesse Zhang <jessetanzhang@gmail.com>
2023-02-20 21:46:58 -08:00

22 lines
506 B
Python

"""Read Microsoft Word files."""
from pathlib import Path
from typing import Dict, List, Optional
from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document
class DocxReader(BaseReader):
"""Docx Reader."""
def load_data(
self, file: Path, extra_info: Optional[Dict] = None
) -> List[Document]:
"""Parse file."""
import docx2txt
text = docx2txt.process(file)
return [Document(text, extra_info=extra_info)]