mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-14 11:41:56 +00:00
35 lines
1.1 KiB
Python
35 lines
1.1 KiB
Python
"""SnscrapeTwitter reader."""
|
|
from typing import List
|
|
|
|
from llama_index.readers.base import BaseReader
|
|
from llama_index.readers.schema.base import Document
|
|
|
|
|
|
class SnscrapeTwitterReader(BaseReader):
|
|
"""SnscrapeTwitter reader. Reads data from a twitter profile.
|
|
|
|
Args:
|
|
username (str): Twitter Username.
|
|
num_tweets (int): Number of tweets to fetch.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize SnscrapeTwitter reader."""
|
|
|
|
|
|
def load_data(self, username: str, num_tweets: int) -> List[Document]:
|
|
"""Load data from a twitter profile.
|
|
|
|
Args:
|
|
username (str): Twitter Username.
|
|
num_tweets (int): Number of tweets to fetch.
|
|
Returns:
|
|
List[Document]: List of documents.
|
|
"""
|
|
import snscrape.modules.twitter as sntwitter
|
|
attributes_container = []
|
|
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(f'from:{username}').get_items()):
|
|
if i>num_tweets:
|
|
break
|
|
attributes_container.append(tweet.rawContent)
|
|
return [Document(text=attributes_container,extra_info={"username": username})] |