mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-14 11:41:56 +00:00
Fixed the READMEs
This commit is contained in:
parent
85f5c0bcda
commit
e94b59bcf2
@ -29,3 +29,5 @@ WHERE age >= 18
|
||||
|
||||
documents = reader.load_data(query=query)
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
||||
|
@ -18,3 +18,5 @@ channel_ids = [1057178784895348746] # Replace with your channel_id
|
||||
reader = DiscordReader(discord_token=discord_token)
|
||||
documents = reader.load_data(channel_ids=channel_ids)
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
||||
|
@ -23,7 +23,7 @@ index.add(...)
|
||||
|
||||
# initalize reader
|
||||
reader = FaissReader(index)
|
||||
# To load data from the Faiss index, you must specify:
|
||||
# To load data from the Faiss index, you must specify:
|
||||
# k: top nearest neighbors
|
||||
# query: a 2D embedding representation of your queries (rows are queries)
|
||||
k = 4
|
||||
@ -33,3 +33,5 @@ query=np.array([query1, query2])
|
||||
documents = reader.load_data(query=query, id_to_text_map=id_to_text_map, k=k)
|
||||
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
||||
|
@ -1,15 +1,17 @@
|
||||
# File Loader
|
||||
|
||||
This loader takes in a local directory containing files and extracts `Document`s from each of the files.
|
||||
This loader takes in a local directory containing files and extracts `Document`s from each of the files. By default, the loader will utilize the specialized loaders in this library to parse common file extensions (e.g. .pdf, .png, .docx, etc). You can optionally pass in your own custom loaders. Note: if no loader is found for a file extension, and the file extension is not in the list to skip, the file will be read directly.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this loader, you simply need to instantiate the `SimpleDirectoryReader` class with a directory, along with other optional settings, such as whether to ignore hidden files. See the code for the complete list.
|
||||
|
||||
```python
|
||||
from loader_hub import SimpleDirectoryReader
|
||||
from gpt_index import download_loader
|
||||
|
||||
loader = SimpleDirectoryReader('data', recursive=True, exclude_hidden=True)
|
||||
SimpleDirectoryReader = download_loader(SimpleDirectoryReader)
|
||||
|
||||
loader = SimpleDirectoryReader('./data', recursive=True, exclude_hidden=True)
|
||||
documents = loader.load_data()
|
||||
```
|
||||
|
||||
@ -20,10 +22,11 @@ This loader is designed to be used as a way to load data into [GPT Index](https:
|
||||
### GPT Index
|
||||
|
||||
```python
|
||||
from loader_hub import SimpleDirectoryReader
|
||||
from gpt_index import GPTSimpleVectorIndex
|
||||
from gpt_index import GPTSimpleVectorIndex, download_loader
|
||||
|
||||
loader = SimpleDirectoryReader('data', recursive=True, exclude_hidden=True)
|
||||
SimpleDirectoryReader = download_loader(SimpleDirectoryReader)
|
||||
|
||||
loader = SimpleDirectoryReader('./data', recursive=True, exclude_hidden=True)
|
||||
documents = loader.load_data()
|
||||
index = GPTSimpleVectorIndex(documents)
|
||||
index.query('What are these files about?')
|
||||
@ -34,13 +37,14 @@ index.query('What are these files about?')
|
||||
Note: Make sure you change the description of the `Tool` to match your use-case.
|
||||
|
||||
```python
|
||||
from loader_hub import SimpleDirectoryReader
|
||||
from gpt_index import GPTSimpleVectorIndex
|
||||
from gpt_index import GPTSimpleVectorIndex, download_loader
|
||||
from langchain.agents import initialize_agent, Tool
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains.conversation.memory import ConversationBufferMemory
|
||||
|
||||
loader = SimpleDirectoryReader('data', recursive=True, exclude_hidden=True)
|
||||
SimpleDirectoryReader = download_loader(SimpleDirectoryReader)
|
||||
|
||||
loader = SimpleDirectoryReader('./data', recursive=True, exclude_hidden=True)
|
||||
documents = loader.load_data()
|
||||
index = GPTSimpleVectorIndex(documents)
|
||||
|
||||
|
@ -0,0 +1,19 @@
|
||||
# Audio File Loader
|
||||
|
||||
This loader uses OpenAI's Whisper model to transcribe the text of an audio file or the audio track of a video file. The file formats .mp3 and .mp4 are preferred. A single local file is passed in each time you call `load_data`.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this loader, you need to pass in a `Path` to a local file.
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from gpt_index import download_loader
|
||||
|
||||
AudioTranscriber = download_loader("AudioTranscriber")
|
||||
|
||||
loader = AudioTranscriber()
|
||||
documents = loader.load_data(file=Path('./podcast.mp3'))
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
@ -0,0 +1,19 @@
|
||||
# Microsoft Word Loader
|
||||
|
||||
This loader extracts the text from a local Microsoft Word (.docx) file. Non-text items in the document are ignored. A single local file is passed in each time you call `load_data`.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this loader, you need to pass in a `Path` to a local file.
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from gpt_index import download_loader
|
||||
|
||||
DocxReader = download_loader("DocxReader")
|
||||
|
||||
loader = DocxReader()
|
||||
documents = loader.load_data(file=Path('./homework.docx'))
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
@ -0,0 +1,19 @@
|
||||
# Epub Loader
|
||||
|
||||
This loader extracts the text from a local Epub file. A single local file is passed in each time you call `load_data`.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this loader, you need to pass in a `Path` to a local file.
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from gpt_index import download_loader
|
||||
|
||||
EpubReader = download_loader("EpubReader")
|
||||
|
||||
loader = EpubReader()
|
||||
documents = loader.load_data(file=Path('./book.epub'))
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
@ -0,0 +1,19 @@
|
||||
# Image Loader
|
||||
|
||||
This loader extracts the text from an image that has text in it (e.g. a receipt). The [Donut](https://huggingface.co/docs/transformers/model_doc/donut) transformer model is used. The file extensions .png, .jpg, and .jpeg are preferred. A single local file is passed in each time you call `load_data`.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this loader, you need to pass in a `Path` to a local file.
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from gpt_index import download_loader
|
||||
|
||||
ImageReader = download_loader("ImageReader")
|
||||
|
||||
loader = ImageReader()
|
||||
documents = loader.load_data(file=Path('./receipt.png'))
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
@ -0,0 +1,19 @@
|
||||
# Markdown Loader
|
||||
|
||||
This loader extracts the text from a local Markdown file. A single local file is passed in each time you call `load_data`.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this loader, you need to pass in a `Path` to a local file.
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from gpt_index import download_loader
|
||||
|
||||
MarkdownReader = download_loader("MarkdownReader")
|
||||
|
||||
loader = MarkdownReader()
|
||||
documents = loader.load_data(file=Path('./README.md'))
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
@ -0,0 +1,19 @@
|
||||
# Pandas CSV Loader
|
||||
|
||||
This loader extracts the text from a local .csv file using the `pandas` Python package. A single local file is passed in each time you call `load_data`.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this loader, you need to pass in a `Path` to a local file.
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from gpt_index import download_loader
|
||||
|
||||
PandasCSVReader = download_loader("PandasCSVReader")
|
||||
|
||||
loader = PandasCSVReader()
|
||||
documents = loader.load_data(file=Path('./transactions.csv'))
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
@ -10,7 +10,7 @@ from gpt_index.readers.base import BaseReader
|
||||
from gpt_index.readers.schema.base import Document
|
||||
|
||||
|
||||
class PandasCSVParser(BaseReader):
|
||||
class PandasCSVReader(BaseReader):
|
||||
r"""Pandas-based CSV parser.
|
||||
|
||||
Parses CSVs using the separator detection from Pandas `read_csv`function.
|
||||
|
@ -0,0 +1,19 @@
|
||||
# Pandas CSV Loader
|
||||
|
||||
This loader extracts the text from a local PDF file using the `PyPDF2` Python package. Any non-text elements are ignored. A single local file is passed in each time you call `load_data`.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this loader, you need to pass in a `Path` to a local file.
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from gpt_index import download_loader
|
||||
|
||||
PDFReader = download_loader("PDFReader")
|
||||
|
||||
loader = PDFReader()
|
||||
documents = loader.load_data(file=Path('./article.pdf'))
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
@ -0,0 +1,19 @@
|
||||
# Microsoft PowerPoint Loader
|
||||
|
||||
This loader extracts the text from a local Microsoft PowerPoint (.pptx) file. Image elements are automatically captioned and inserted as text into the final `Document` using [GPT2 Image Captioning model](https://huggingface.co/nlpconnect/vit-gpt2-image-captioning). A single local file is passed in each time you call `load_data`.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this loader, you need to pass in a `Path` to a local file.
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from gpt_index import download_loader
|
||||
|
||||
PptxReader = download_loader("PptxReader")
|
||||
|
||||
loader = PptxReader()
|
||||
documents = loader.load_data(file=Path('./deck.pptx'))
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
@ -0,0 +1,19 @@
|
||||
# Simple CSV Loader
|
||||
|
||||
This loader extracts the text from a local .csv file by directly reading the file row by row. A single local file is passed in each time you call `load_data`.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this loader, you need to pass in a `Path` to a local file.
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from gpt_index import download_loader
|
||||
|
||||
SimpleCSVReader = download_loader("SimpleCSVReader")
|
||||
|
||||
loader = SimpleCSVReader()
|
||||
documents = loader.load_data(file=Path('./transactions.csv'))
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
@ -29,5 +29,6 @@ wrapper.pass_response_to_webhook(
|
||||
query_str
|
||||
)
|
||||
|
||||
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
||||
|
@ -23,3 +23,5 @@ query_dict = {}
|
||||
reader = SimpleMongoReader(host, port)
|
||||
documents = reader.load_data(db_name, collection_name, query_dict=query_dict)
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
||||
|
@ -20,3 +20,5 @@ reader = NotionPageReader(integration_token=integration_token)
|
||||
documents = reader.load_data(page_ids=page_ids)
|
||||
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
||||
|
@ -11,5 +11,7 @@ from gpt_index import download_loader
|
||||
import os
|
||||
|
||||
ObsidianReader = download_loader('ObsidianReader')
|
||||
documents = ObsidianReader('/path/to/dir').load_data() # Returns list of documents
|
||||
documents = ObsidianReader('/path/to/dir').load_data() # Returns list of documents
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
||||
|
@ -13,7 +13,7 @@ import os
|
||||
|
||||
PineconeReader = download_loader('PineconeReader')
|
||||
|
||||
# the id_to_text_map specifies a mapping from the ID specified in Pinecone to your text.
|
||||
# the id_to_text_map specifies a mapping from the ID specified in Pinecone to your text.
|
||||
id_to_text_map = {
|
||||
"id1": "text blob 1",
|
||||
"id2": "text blob 2",
|
||||
@ -27,10 +27,12 @@ query_vector=[n1, n2, n3, ...]
|
||||
|
||||
reader = PineconeReader(api_key=api_key, environment="us-west1-gcp")
|
||||
documents = reader.load_data(
|
||||
index_name='quickstart',
|
||||
id_to_text_map=id_to_text_map,
|
||||
top_k=3,
|
||||
vector=query_vector,
|
||||
index_name='quickstart',
|
||||
id_to_text_map=id_to_text_map,
|
||||
top_k=3,
|
||||
vector=query_vector,
|
||||
separate_documents=True
|
||||
)
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
||||
|
@ -20,7 +20,7 @@ query_vector=[n1, n2, n3, ...]
|
||||
|
||||
# NOTE: Required args are collection_name, query_vector.
|
||||
# See the Python client: https://github.com/qdrant/qdrant_client
|
||||
# for more details.
|
||||
# for more details.
|
||||
documents = reader.load_data(
|
||||
collection_name="demo",
|
||||
query_vector=query_vector,
|
||||
@ -28,3 +28,5 @@ documents = reader.load_data(
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
This loader is designed to be used as a way to load data into [GPT Index](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/loader-hub/tree/main) for examples.
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Twitter Loader
|
||||
# String Iterable Loader
|
||||
|
||||
This loader converts an iterable (e.g. list) of strings into `Document`s.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user