mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-15 12:11:50 +00:00
95 lines
2.4 KiB
Python
95 lines
2.4 KiB
Python
"""Intercom reader."""
|
|
from typing import List
|
|
import json
|
|
from llama_index.readers.base import BaseReader
|
|
from llama_index.readers.schema.base import Document
|
|
|
|
|
|
class IntercomReader(BaseReader):
|
|
"""Intercom reader. Reads data from a Intercom workspace.
|
|
|
|
Args:
|
|
personal_access_token (str): Intercom token.
|
|
"""
|
|
|
|
def __init__(self, intercom_access_token: str) -> None:
|
|
"""Initialize Intercom reader."""
|
|
|
|
self.intercom_access_token = intercom_access_token
|
|
|
|
def load_data(self) -> List[Document]:
|
|
"""Load data from the workspace.
|
|
|
|
Args:
|
|
workspace_id (str): Workspace ID.
|
|
Returns:
|
|
List[Document]: List of documents.
|
|
"""
|
|
from bs4 import BeautifulSoup
|
|
|
|
results = []
|
|
|
|
articles = self.get_all_articles()
|
|
|
|
for article in articles:
|
|
|
|
body = article['body']
|
|
soup = BeautifulSoup(body, 'html.parser')
|
|
body = soup.get_text()
|
|
|
|
extra_info = {
|
|
"id": article['id'],
|
|
"title": article['title'],
|
|
"url": article['url'],
|
|
"updated_at": article['updated_at']
|
|
}
|
|
|
|
results.append(
|
|
Document(
|
|
body,
|
|
extra_info=extra_info,
|
|
)
|
|
)
|
|
|
|
return results
|
|
|
|
def get_all_articles(self):
|
|
articles = []
|
|
next_page = None
|
|
|
|
while True:
|
|
response = self.get_articles_page(next_page)
|
|
articles.extend(response['articles'])
|
|
next_page = response['next_page']
|
|
|
|
if next_page is None:
|
|
break
|
|
|
|
return articles
|
|
|
|
def get_articles_page(self, next_page: str = None):
|
|
import requests
|
|
if next_page is None:
|
|
url = "https://api.intercom.io/articles"
|
|
else:
|
|
url = next_page
|
|
|
|
headers = {
|
|
"accept": "application/json",
|
|
"Intercom-Version": "2.8",
|
|
"authorization": f"Bearer {self.intercom_access_token}"
|
|
}
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
response_json = json.loads(response.text)
|
|
|
|
next_page = response_json.get('pages', {}).get('next', None)
|
|
|
|
articles = response_json.get('data', [])
|
|
|
|
return {
|
|
"articles": articles,
|
|
"next_page": next_page
|
|
}
|