mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-15 04:01:32 +00:00
25 lines
630 B
Python
25 lines
630 B
Python
![]() |
"""Read Microsoft Word files."""
|
||
|
|
||
|
from pathlib import Path
|
||
|
from typing import Dict, Optional, List
|
||
|
|
||
|
from gpt_index.readers.base import BaseReader
|
||
|
from gpt_index.readers.schema.base import Document
|
||
|
|
||
|
|
||
|
class DocxReader(BaseReader):
|
||
|
"""Docx Reader."""
|
||
|
|
||
|
def load_data(
|
||
|
self, file: Path, extra_info: Optional[Dict] = None
|
||
|
) -> List[Document]:
|
||
|
"""Parse file."""
|
||
|
try:
|
||
|
import docx2txt
|
||
|
except ImportError:
|
||
|
raise ValueError("docx2txt is required to read Microsoft Word files.")
|
||
|
|
||
|
text = docx2txt.process(file)
|
||
|
|
||
|
return [Document(text, extra_info=extra_info)]
|