2023-03-06 11:49:42 -06:00

95 lines
2.4 KiB
Python

"""Intercom reader."""
from typing import List
import json
from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document
class IntercomReader(BaseReader):
"""Intercom reader. Reads data from a Intercom workspace.
Args:
personal_access_token (str): Intercom token.
"""
def __init__(self, intercom_access_token: str) -> None:
"""Initialize Intercom reader."""
self.intercom_access_token = intercom_access_token
def load_data(self) -> List[Document]:
"""Load data from the workspace.
Args:
workspace_id (str): Workspace ID.
Returns:
List[Document]: List of documents.
"""
from bs4 import BeautifulSoup
results = []
articles = self.get_all_articles()
for article in articles:
body = article['body']
soup = BeautifulSoup(body, 'html.parser')
body = soup.get_text()
extra_info = {
"id": article['id'],
"title": article['title'],
"url": article['url'],
"updated_at": article['updated_at']
}
results.append(
Document(
body,
extra_info=extra_info,
)
)
return results
def get_all_articles(self):
articles = []
next_page = None
while True:
response = self.get_articles_page(next_page)
articles.extend(response['articles'])
next_page = response['next_page']
if next_page is None:
break
return articles
def get_articles_page(self, next_page: str = None):
import requests
if next_page is None:
url = "https://api.intercom.io/articles"
else:
url = next_page
headers = {
"accept": "application/json",
"Intercom-Version": "2.8",
"authorization": f"Bearer {self.intercom_access_token}"
}
response = requests.get(url, headers=headers)
response_json = json.loads(response.text)
next_page = response_json.get('pages', {}).get('next', None)
articles = response_json.get('data', [])
return {
"articles": articles,
"next_page": next_page
}