bluenote 9a98b46413
Add Zulip Reader (#181)
Co-authored-by: Barton Rhodes <barton@plurigrid.xyz>
2023-04-12 22:41:21 -07:00

68 lines
2.4 KiB
Python

import logging
from typing import List, Optional
from datetime import datetime
import os
from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document
logger = logging.getLogger(__name__)
class ZulipReader(BaseReader):
"""Zulip reader."""
def __init__(
self,
zulip_email: str,
zulip_domain: str,
earliest_date: Optional[datetime] = None,
latest_date: Optional[datetime] = None,
) -> None:
import zulip
"""Initialize with parameters."""
# Read the Zulip token from the environment variable
zulip_token = os.environ.get("ZULIP_TOKEN")
if zulip_token is None:
raise ValueError("ZULIP_TOKEN environment variable not set.")
# Initialize Zulip client with provided parameters
self.client = zulip.Client(api_key=zulip_token, email=zulip_email, site=zulip_domain)
def _read_stream(self, stream_name: str, reverse_chronological: bool) -> str:
"""Read a stream."""
params = {
"narrow": [{"operator": "stream", "operand": stream_name}],
"anchor": "newest",
"num_before": 100,
"num_after": 0,
}
response = self.client.get_messages(params)
messages = response["messages"]
if reverse_chronological:
messages.reverse()
return " ".join([message["content"] for message in messages])
def load_data(
self, streams: List[str], reverse_chronological: bool = True
) -> List[Document]:
"""Load data from the input streams."""
# Load data logic here
data = []
for stream_name in streams:
stream_content = self._read_stream(stream_name, reverse_chronological)
data.append(Document(stream_content, extra_info={"stream": stream_name}))
return data
def get_all_streams(self) -> list:
# Fetch all streams
response = self.client.get_streams()
streams_data = response["streams"]
# Collect the stream IDs
stream_names = [stream['name'] for stream in streams_data]
return stream_names
if __name__ == "__main__":
reader = ZulipReader(zulip_email="ianita-bot@plurigrid.zulipchat.com", zulip_domain="plurigrid.zulipchat.com")
logging.info(reader.load_data(reader.get_all_streams()))