mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-13 11:11:48 +00:00
76 lines
2.5 KiB
Python
76 lines
2.5 KiB
Python
"""CouchDB client."""
|
|
|
|
from typing import Dict, List, Optional
|
|
from llama_index.readers.base import BaseReader
|
|
from llama_index.readers.schema.base import Document
|
|
import logging
|
|
import json
|
|
|
|
class SimpleCouchDBReader(BaseReader):
|
|
"""Simple CouchDB reader.
|
|
|
|
Concatenates each CouchDB doc into Document used by LlamaIndex.
|
|
|
|
Args:
|
|
couchdb_url (str): CouchDB Full URL.
|
|
max_docs (int): Maximum number of documents to load.
|
|
|
|
"""
|
|
|
|
def __init__(self, user: str, pwd: str, host: str, port: int, couchdb_url: Optional[Dict] = None, max_docs: int = 1000) -> None:
|
|
"""Initialize with parameters."""
|
|
import couchdb3
|
|
|
|
if couchdb_url is not None:
|
|
self.client: CouchDBClient = couchdb3.Server(couchdb_url)
|
|
else:
|
|
self.client: CouchDBClient = couchdb3.Server(f'http://{user}:{pwd}@{host}:{port}')
|
|
self.max_docs = max_docs
|
|
|
|
def load_data(
|
|
self, db_name: str, query: Optional[str] = None
|
|
) -> List[Document]:
|
|
"""Load data from the input directory.
|
|
|
|
Args:
|
|
db_name (str): name of the database.
|
|
query (Optional[str]): query to filter documents.
|
|
Defaults to None
|
|
|
|
Returns:
|
|
List[Document]: A list of documents.
|
|
|
|
"""
|
|
documents = []
|
|
db = self.client.get(db_name)
|
|
if query is None:
|
|
#if no query is specified, return all docs in database
|
|
logging.debug('showing all docs')
|
|
results = db.view('_all_docs',include_docs=True)
|
|
else:
|
|
logging.debug('executing query')
|
|
results = db.find(query)
|
|
|
|
if type(results) is not dict:
|
|
logging.debug(results.rows)
|
|
else:
|
|
logging.debug(results)
|
|
|
|
#check if more than one result
|
|
if type(results) is not dict and results.rows is not None:
|
|
for row in results.rows:
|
|
#check that the id field exists
|
|
if "id" not in row:
|
|
raise ValueError("`id` field not found in CouchDB document.")
|
|
documents.append(Document(json.dumps(row.doc)))
|
|
else:
|
|
#only one result
|
|
if results.get('docs') is not None:
|
|
for item in results.get('docs'):
|
|
#check that the _id field exists
|
|
if "_id" not in item:
|
|
raise ValueError("`_id` field not found in CouchDB document.")
|
|
documents.append(Document(json.dumps(item)))
|
|
|
|
return documents
|