mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-14 19:51:25 +00:00
35 lines
1.1 KiB
Python
35 lines
1.1 KiB
Python
![]() |
"""SnscrapeTwitter reader."""
|
||
|
from typing import List
|
||
|
|
||
|
from llama_index.readers.base import BaseReader
|
||
|
from llama_index.readers.schema.base import Document
|
||
|
|
||
|
|
||
|
class SnscrapeTwitterReader(BaseReader):
|
||
|
"""SnscrapeTwitter reader. Reads data from a twitter profile.
|
||
|
|
||
|
Args:
|
||
|
username (str): Twitter Username.
|
||
|
num_tweets (int): Number of tweets to fetch.
|
||
|
"""
|
||
|
|
||
|
def __init__(self):
|
||
|
"""Initialize SnscrapeTwitter reader."""
|
||
|
|
||
|
|
||
|
def load_data(self, username: str, num_tweets: int) -> List[Document]:
|
||
|
"""Load data from a twitter profile.
|
||
|
|
||
|
Args:
|
||
|
username (str): Twitter Username.
|
||
|
num_tweets (int): Number of tweets to fetch.
|
||
|
Returns:
|
||
|
List[Document]: List of documents.
|
||
|
"""
|
||
|
import snscrape.modules.twitter as sntwitter
|
||
|
attributes_container = []
|
||
|
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(f'from:{username}').get_items()):
|
||
|
if i>num_tweets:
|
||
|
break
|
||
|
attributes_container.append(tweet.rawContent)
|
||
|
return [Document(text=attributes_container,extra_info={"username": username})]
|