llama-hub/loader_hub/zendesk/base.py

"""Zendesk reader."""
import json
from typing import List

from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document


class ZendeskReader(BaseReader):
    """Zendesk reader. Reads data from a Zendesk workspace.

    Args:
        zendesk_subdomain (str): Zendesk subdomain
        locale (str): Locale of articles
    """

    def __init__(self, zendesk_subdomain: str, locale: str = "en-us") -> None:
        """Initialize Zendesk reader."""
        self.zendesk_subdomain = zendesk_subdomain
        self.locale = locale

    def load_data(self) -> List[Document]:
        """Load data from the workspace.

        Args:
            workspace_id (str): Workspace ID.
        Returns:
            List[Document]: List of documents.
        """
        from bs4 import BeautifulSoup

        results = []

        articles = self.get_all_articles()
        for article in articles:
            body = article["body"]
            soup = BeautifulSoup(body, "html.parser")
            body = soup.get_text()
            extra_info = {
                "id": article["id"],
                "title": article["title"],
                "url": article["html_url"],
                "updated_at": article["updated_at"],
            }

            results.append(
                Document(
                    body,
                    extra_info=extra_info,
                )
            )

        return results

    def get_all_articles(self):
        articles = []
        next_page = None

        while True:
            response = self.get_articles_page(next_page)
            articles.extend(response["articles"])
            next_page = response["next_page"]

            if next_page is None:
                break

        return articles

    def get_articles_page(self, next_page: str = None):
        import requests

        if next_page is None:
            url = f"https://{self.zendesk_subdomain}.zendesk.com/api/v2/help_center/{self.locale}/articles?per_page=100"
        else:
            url = next_page

        response = requests.get(url)

        response_json = json.loads(response.text)

        next_page = response_json.get("next_page", None)

        articles = response_json.get("articles", [])

        return {"articles": articles, "next_page": next_page}
add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00			`"""Zendesk reader."""`
			`import json`
fix bs4 importing 2023-03-06 11:49:42 -06:00			`from typing import List`

add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00			`from llama_index.readers.base import BaseReader`
			`from llama_index.readers.schema.base import Document`


			`class ZendeskReader(BaseReader):`
			`"""Zendesk reader. Reads data from a Zendesk workspace.`

			`Args:`
			`zendesk_subdomain (str): Zendesk subdomain`
Add locale settings for Zendesk loader (#131) 2023-03-21 01:56:43 +09:00			`locale (str): Locale of articles`
add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00			`"""`

Add locale settings for Zendesk loader (#131) 2023-03-21 01:56:43 +09:00			`def __init__(self, zendesk_subdomain: str, locale: str = "en-us") -> None:`
add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00			`"""Initialize Zendesk reader."""`
			`self.zendesk_subdomain = zendesk_subdomain`
Add locale settings for Zendesk loader (#131) 2023-03-21 01:56:43 +09:00			`self.locale = locale`
add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00
			`def load_data(self) -> List[Document]:`
			`"""Load data from the workspace.`

			`Args:`
			`workspace_id (str): Workspace ID.`
			`Returns:`
			`List[Document]: List of documents.`
			`"""`
fix bs4 importing 2023-03-06 11:49:42 -06:00			`from bs4 import BeautifulSoup`

add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00			`results = []`

			`articles = self.get_all_articles()`
			`for article in articles:`
fix bs4 importing 2023-03-06 11:49:42 -06:00			`body = article["body"]`
			`soup = BeautifulSoup(body, "html.parser")`
add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00			`body = soup.get_text()`
			`extra_info = {`
fix bs4 importing 2023-03-06 11:49:42 -06:00			`"id": article["id"],`
			`"title": article["title"],`
			`"url": article["html_url"],`
			`"updated_at": article["updated_at"],`
add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00			`}`

			`results.append(`
			`Document(`
			`body,`
			`extra_info=extra_info,`
			`)`
			`)`

			`return results`

			`def get_all_articles(self):`
			`articles = []`
			`next_page = None`

			`while True:`
			`response = self.get_articles_page(next_page)`
fix bs4 importing 2023-03-06 11:49:42 -06:00			`articles.extend(response["articles"])`
			`next_page = response["next_page"]`
add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00
			`if next_page is None:`
			`break`

			`return articles`

			`def get_articles_page(self, next_page: str = None):`
fix bs4 importing 2023-03-06 11:49:42 -06:00			`import requests`

add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00			`if next_page is None:`
Add locale settings for Zendesk loader (#131) 2023-03-21 01:56:43 +09:00			`url = f"https://{self.zendesk_subdomain}.zendesk.com/api/v2/help_center/{self.locale}/articles?per_page=100"`
add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00			`else:`
			`url = next_page`

			`response = requests.get(url)`

			`response_json = json.loads(response.text)`

fix bs4 importing 2023-03-06 11:49:42 -06:00			`next_page = response_json.get("next_page", None)`
add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00
fix bs4 importing 2023-03-06 11:49:42 -06:00			`articles = response_json.get("articles", [])`
add zendesk, intercom, and wordpress API loaders 2023-03-05 15:06:10 -06:00
fix bs4 importing 2023-03-06 11:49:42 -06:00			`return {"articles": articles, "next_page": next_page}`