76 lines
2.5 KiB
Python

"""CouchDB client."""
from typing import Dict, List, Optional
from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document
import logging
import json
class SimpleCouchDBReader(BaseReader):
"""Simple CouchDB reader.
Concatenates each CouchDB doc into Document used by LlamaIndex.
Args:
couchdb_url (str): CouchDB Full URL.
max_docs (int): Maximum number of documents to load.
"""
def __init__(self, user: str, pwd: str, host: str, port: int, couchdb_url: Optional[Dict] = None, max_docs: int = 1000) -> None:
"""Initialize with parameters."""
import couchdb3
if couchdb_url is not None:
self.client: CouchDBClient = couchdb3.Server(couchdb_url)
else:
self.client: CouchDBClient = couchdb3.Server(f'http://{user}:{pwd}@{host}:{port}')
self.max_docs = max_docs
def load_data(
self, db_name: str, query: Optional[str] = None
) -> List[Document]:
"""Load data from the input directory.
Args:
db_name (str): name of the database.
query (Optional[str]): query to filter documents.
Defaults to None
Returns:
List[Document]: A list of documents.
"""
documents = []
db = self.client.get(db_name)
if query is None:
#if no query is specified, return all docs in database
logging.debug('showing all docs')
results = db.view('_all_docs',include_docs=True)
else:
logging.debug('executing query')
results = db.find(query)
if type(results) is not dict:
logging.debug(results.rows)
else:
logging.debug(results)
#check if more than one result
if type(results) is not dict and results.rows is not None:
for row in results.rows:
#check that the id field exists
if "id" not in row:
raise ValueError("`id` field not found in CouchDB document.")
documents.append(Document(json.dumps(row.doc)))
else:
#only one result
if results.get('docs') is not None:
for item in results.get('docs'):
#check that the _id field exists
if "_id" not in item:
raise ValueError("`_id` field not found in CouchDB document.")
documents.append(Document(json.dumps(item)))
return documents