mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-13 11:11:48 +00:00
68 lines
2.4 KiB
Python
68 lines
2.4 KiB
Python
import logging
|
|
from typing import List, Optional
|
|
from datetime import datetime
|
|
import os
|
|
from llama_index.readers.base import BaseReader
|
|
from llama_index.readers.schema.base import Document
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class ZulipReader(BaseReader):
|
|
"""Zulip reader."""
|
|
|
|
def __init__(
|
|
self,
|
|
zulip_email: str,
|
|
zulip_domain: str,
|
|
earliest_date: Optional[datetime] = None,
|
|
latest_date: Optional[datetime] = None,
|
|
) -> None:
|
|
import zulip
|
|
|
|
"""Initialize with parameters."""
|
|
# Read the Zulip token from the environment variable
|
|
zulip_token = os.environ.get("ZULIP_TOKEN")
|
|
|
|
if zulip_token is None:
|
|
raise ValueError("ZULIP_TOKEN environment variable not set.")
|
|
|
|
# Initialize Zulip client with provided parameters
|
|
self.client = zulip.Client(api_key=zulip_token, email=zulip_email, site=zulip_domain)
|
|
|
|
def _read_stream(self, stream_name: str, reverse_chronological: bool) -> str:
|
|
"""Read a stream."""
|
|
params = {
|
|
"narrow": [{"operator": "stream", "operand": stream_name}],
|
|
"anchor": "newest",
|
|
"num_before": 100,
|
|
"num_after": 0,
|
|
}
|
|
response = self.client.get_messages(params)
|
|
messages = response["messages"]
|
|
if reverse_chronological:
|
|
messages.reverse()
|
|
return " ".join([message["content"] for message in messages])
|
|
|
|
def load_data(
|
|
self, streams: List[str], reverse_chronological: bool = True
|
|
) -> List[Document]:
|
|
"""Load data from the input streams."""
|
|
# Load data logic here
|
|
data = []
|
|
for stream_name in streams:
|
|
stream_content = self._read_stream(stream_name, reverse_chronological)
|
|
data.append(Document(stream_content, extra_info={"stream": stream_name}))
|
|
return data
|
|
|
|
def get_all_streams(self) -> list:
|
|
# Fetch all streams
|
|
response = self.client.get_streams()
|
|
streams_data = response["streams"]
|
|
# Collect the stream IDs
|
|
stream_names = [stream['name'] for stream in streams_data]
|
|
return stream_names
|
|
|
|
if __name__ == "__main__":
|
|
reader = ZulipReader(zulip_email="ianita-bot@plurigrid.zulipchat.com", zulip_domain="plurigrid.zulipchat.com")
|
|
logging.info(reader.load_data(reader.get_all_streams()))
|