mirror of
				https://github.com/run-llama/llama-hub.git
				synced 2025-11-03 19:30:13 +00:00 
			
		
		
		
	Merge pull request #92 from AgentHQ/main
This commit is contained in:
		
						commit
						fdc44a79cb
					
				
							
								
								
									
										21
									
								
								loader_hub/gmail/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								loader_hub/gmail/README.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,21 @@
 | 
			
		||||
# Gmail Loader
 | 
			
		||||
 | 
			
		||||
This loader seaches your Gmail account and parses the resulting emails into `Document`s. The search query can include normal query params, like `from: email@example.com label:inbox`.
 | 
			
		||||
 | 
			
		||||
As a prerequisite, you will need to register with Google and generate a `credentials.json` file in the directory where you run this loader. See [here](https://developers.google.com/workspace/guides/create-credentials) for instructions.
 | 
			
		||||
 | 
			
		||||
## Usage
 | 
			
		||||
 | 
			
		||||
To use this loader, you simply need to pass in a search query string.
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
from llama_index import download_loader
 | 
			
		||||
 | 
			
		||||
GmailReader = download_loader('GmailReader')
 | 
			
		||||
loader = GmailReader(query="from: me label:inbox")
 | 
			
		||||
documents = loader.load_data()
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Examples
 | 
			
		||||
 | 
			
		||||
This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent.
 | 
			
		||||
							
								
								
									
										1
									
								
								loader_hub/gmail/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								loader_hub/gmail/__init__.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
			
		||||
"""Init file."""
 | 
			
		||||
							
								
								
									
										139
									
								
								loader_hub/gmail/base.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								loader_hub/gmail/base.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,139 @@
 | 
			
		||||
"""Google Mail reader."""
 | 
			
		||||
import email
 | 
			
		||||
from typing import Any, List
 | 
			
		||||
from llama_index.readers.base import BaseReader
 | 
			
		||||
from llama_index.readers.schema.base import Document
 | 
			
		||||
from pydantic import BaseModel
 | 
			
		||||
import base64
 | 
			
		||||
 | 
			
		||||
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GmailReader(BaseReader, BaseModel):
 | 
			
		||||
    """Gmail reader.
 | 
			
		||||
 | 
			
		||||
    Reads emails
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        query (str): Gmail query. Defaults to None.
 | 
			
		||||
        max_results (int): Max number of results. Defaults to 10.
 | 
			
		||||
    """
 | 
			
		||||
    query: str = None
 | 
			
		||||
    max_results: int = 10
 | 
			
		||||
    service: Any
 | 
			
		||||
 | 
			
		||||
    def load_data(
 | 
			
		||||
        self
 | 
			
		||||
    ) -> List[Document]:
 | 
			
		||||
        """Load emails from the user's account
 | 
			
		||||
        """
 | 
			
		||||
        from googleapiclient.discovery import build
 | 
			
		||||
 | 
			
		||||
        credentials = self._get_credentials()
 | 
			
		||||
        if not self.service:
 | 
			
		||||
            self.service = build('gmail', 'v1', credentials=credentials)
 | 
			
		||||
 | 
			
		||||
        messsages = self.search_messages()
 | 
			
		||||
 | 
			
		||||
        results = []
 | 
			
		||||
        for message in messsages:
 | 
			
		||||
            text = message.pop('body')
 | 
			
		||||
            extra_info = message
 | 
			
		||||
            results.append(Document(text, extra_info=extra_info))
 | 
			
		||||
 | 
			
		||||
        return results
 | 
			
		||||
 | 
			
		||||
    def _get_credentials(self) -> Any:
 | 
			
		||||
        """Get valid user credentials from storage.
 | 
			
		||||
 | 
			
		||||
        The file token.json stores the user's access and refresh tokens, and is
 | 
			
		||||
        created automatically when the authorization flow completes for the first
 | 
			
		||||
        time.
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            Credentials, the obtained credential.
 | 
			
		||||
        """
 | 
			
		||||
        import os
 | 
			
		||||
        from google.auth.transport.requests import Request
 | 
			
		||||
        from google.oauth2.credentials import Credentials
 | 
			
		||||
        from google_auth_oauthlib.flow import InstalledAppFlow
 | 
			
		||||
 | 
			
		||||
        creds = None
 | 
			
		||||
        if os.path.exists("token.json"):
 | 
			
		||||
            creds = Credentials.from_authorized_user_file("token.json", SCOPES)
 | 
			
		||||
        # If there are no (valid) credentials available, let the user log in.
 | 
			
		||||
        if not creds or not creds.valid:
 | 
			
		||||
            if creds and creds.expired and creds.refresh_token:
 | 
			
		||||
                creds.refresh(Request())
 | 
			
		||||
            else:
 | 
			
		||||
                flow = InstalledAppFlow.from_client_secrets_file(
 | 
			
		||||
                    "credentials.json", SCOPES
 | 
			
		||||
                )
 | 
			
		||||
                creds = flow.run_local_server(port=0)
 | 
			
		||||
            # Save the credentials for the next run
 | 
			
		||||
            with open("token.json", "w") as token:
 | 
			
		||||
                token.write(creds.to_json())
 | 
			
		||||
 | 
			
		||||
        return creds
 | 
			
		||||
 | 
			
		||||
    def search_messages(self):
 | 
			
		||||
        query = self.query
 | 
			
		||||
 | 
			
		||||
        max_results = self.max_results
 | 
			
		||||
 | 
			
		||||
        messages = self.service.users().messages().list(
 | 
			
		||||
            userId='me',
 | 
			
		||||
            q=query,
 | 
			
		||||
            maxResults=int(max_results)
 | 
			
		||||
        ).execute().get('messages', [])
 | 
			
		||||
 | 
			
		||||
        result = []
 | 
			
		||||
        try:
 | 
			
		||||
            for message in messages:
 | 
			
		||||
                message_data = self.get_message_data(message)
 | 
			
		||||
                if not message_data:
 | 
			
		||||
                    continue
 | 
			
		||||
                result.append(message_data)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            raise Exception("Can't get message data" + str(e))
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def get_message_data(self, message):
 | 
			
		||||
        message_id = message['id']
 | 
			
		||||
        message_data = self.service.users().messages().get(
 | 
			
		||||
            format="raw",
 | 
			
		||||
            userId='me',
 | 
			
		||||
            id=message_id).execute()
 | 
			
		||||
        body = self.extract_message_body(message_data)
 | 
			
		||||
 | 
			
		||||
        if not body:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            "id": message_data['id'],
 | 
			
		||||
            "threadId": message_data['threadId'],
 | 
			
		||||
            "snippet": message_data['snippet'],
 | 
			
		||||
            "body": body,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def extract_message_body(self, message: dict):
 | 
			
		||||
        from bs4 import BeautifulSoup
 | 
			
		||||
        try:
 | 
			
		||||
            body = base64.urlsafe_b64decode(message['raw'].encode('ASCII'))
 | 
			
		||||
            mime_msg = email.message_from_bytes(body)
 | 
			
		||||
 | 
			
		||||
            # If the message body contains HTML, parse it with BeautifulSoup
 | 
			
		||||
            if 'text/html' in mime_msg:
 | 
			
		||||
                soup = BeautifulSoup(body, 'html.parser')
 | 
			
		||||
                body = soup.get_text()
 | 
			
		||||
            return str(body)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            raise Exception("Can't parse message body" + str(e))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    reader = GmailReader(query="from:me after:2023-01-01")
 | 
			
		||||
    print(
 | 
			
		||||
        reader.load_data()
 | 
			
		||||
    )
 | 
			
		||||
							
								
								
									
										4
									
								
								loader_hub/gmail/requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								loader_hub/gmail/requirements.txt
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,4 @@
 | 
			
		||||
google-api-python-client
 | 
			
		||||
google-auth-httplib2
 | 
			
		||||
google-auth-oauthlib
 | 
			
		||||
beautifulsoup4
 | 
			
		||||
@ -324,9 +324,14 @@
 | 
			
		||||
    "author": "bbornsztein",
 | 
			
		||||
    "keywords": ["wordpress", "blog"]
 | 
			
		||||
  },
 | 
			
		||||
  "SteamshipFileReader": {
 | 
			
		||||
  "GmailReader": {
 | 
			
		||||
    "id": "gmail",
 | 
			
		||||
    "author": "bbornsztein",
 | 
			
		||||
    "keywords": ["gmail", "email"]
 | 
			
		||||
  },
 | 
			
		||||
 "SteamshipFileReader": {
 | 
			
		||||
    "id": "steamship",
 | 
			
		||||
    "author": "douglas-reid",
 | 
			
		||||
    "keywords": ["steamship"]
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user