Loader for local version of Outlook Calendar (#201)

Co-authored-by: Jerry Liu <jerryjliu98@gmail.com>
This commit is contained in:
Tom Evslin 2023-04-17 19:37:23 -04:00 committed by GitHub
parent 6a6dc85203
commit f5abd36ab5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 155 additions and 0 deletions

View File

@ -481,6 +481,14 @@
"id": "zulip",
"author": "plurigrid"
},
"OutlookLocalCalendarReader": {
"id":"outlook_localcalendar",
"author":"tevslin",
"keywords":[
"calendar",
"outlook"
]
},
"ApifyActor": {
"id": "apify/actor",
"author": "drobnikj",

View File

@ -0,0 +1,39 @@
# Outlook Local Calendar Loader
This loader reads your past and upcoming Calendar events from your local Outlook .ost or .pst and parses the relevant info into `Documents`.
It runs on Windows only and has only been tested with Windows 11. It has been designed to have a supoerset of the functionality of the Google Calendar reader.
## Usage
Here's an example usage of the OutlookCalendar Reader. It will retrieve up to 100 future events, unless an optional `number_of_results` argument is passed. It will also retrieve only future events, unless an optional `start_date` argument is passed. Optionally events can be restricted to those which occur on or before a specific date by specifying the optional `end-date` parameter. By default, `end-date` is 2199-01-01.
It always returns Start, End, Subject, Location, and Organizer attributes and optionally returns additional attributes specified in the `more_attributes` parameter, which, if specified, must be a list of strings eg. ['Body','someotherattribute',...]. Attributes which don't exist in a calendar entry are ignored without warning.
```python
from llama_index import download_loader
OutlookCalendarReader = download_loader('OutlookLocalCalendarReader')
loader = OutlookCalendarReader()
documents = loader.load_data()
```
## Example
This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent.
### LlamaIndex
```python
from llama_index import GPTSimpleVectorIndex, download_loader
OutlookCalendarReader = download_loader('OutlookLocalCalendarReader')
loader = OutlookCalendarReader(start_date='2022-01-01',number_of_documents=1000)
documents = loader.load_data()
index = GPTSimpleVectorIndex(documents)
index.query('When did I last see George Guava? When do I see him again?')
```
Note: it is actually better to give s structured prompt with this data and be sure to it is clear what today's date is and whether you want any data besides the indexed data used in answering the prompt.

View File

@ -0,0 +1 @@
"""Init file."""

View File

@ -0,0 +1,106 @@
"""Outlook local calendar reader for Windows."""
"""
Created on Sun Apr 16 12:03:19 2023
@author: tevslin
"""
import datetime
import importlib
import platform
from typing import List, Optional, Union
from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document
# Copyright 2023 Evslin Consulting
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class OutlookLocalCalendarReader(BaseReader):
"""Outlook local calendar reader for Windows.
Reads events from local copy of Outlook calendar
"""
def load_data(
self,
number_of_results: Optional[int] = 100,
start_date: Optional[Union[str, datetime.date]] = None,
end_date: Optional[Union[str, datetime.date]] = None,
more_attributes: Optional [ List[str]]=None
) -> List[Document]:
"""Load data from user's local calendar.
Args:
number_of_results (Optional[int]): the number of events to return. Defaults to 100.
start_date (Optional[Union[str, datetime.date]]): the start date to return events from. Defaults to today.
end_date (Optional[Union[str, datetime.date]]): the last date (inclusive) to return events from. Defaults to 2199-01-01.
more_attributes (Optional[ List[str]]): additional attributes to be retrieved from calendar entries. Non-existnat attributes are ignored.
Returns a list of documents sutitable for indexing by llam_index. Always returns Start, End, Subject, Location, and Organizer
attributes and optionally returns additional attributes specified in the more_attributes parameter.
"""
if platform.system().lower() != "windows":
return([])
attributes=["Start","End","Subject","Location","Organizer"] # base attrubutes to return
if not more_attributes is None: #if the user has specified more attributes
attributes+=more_attributes
if start_date is None:
start_date = datetime.date.today()
elif isinstance(start_date, str):
start_date = datetime.date.fromisoformat(start_date)
# Initialize the Outlook application
winstuff=importlib.import_module("win32com.client")
outlook = winstuff.Dispatch("Outlook.Application").GetNamespace("MAPI")
# Get the Calendar folder
calendar_folder = outlook.GetDefaultFolder(9)
# Retrieve calendar items
events = calendar_folder.Items
if not events:
return []
events.Sort('[Start]') # Sort items by start time
numberReturned=0
results = []
for event in events:
converted_date = datetime.date(event.Start.year, event.Start.month, event.Start.day)
if converted_date>start_date: #if past start date
numberReturned+=1
eventstring=''
for attribute in attributes:
if hasattr(event,attribute):
eventstring+=f"{attribute}: {getattr(event,attribute)}, "
results.append(Document(eventstring))
if numberReturned>=number_of_results:
break
return results
if __name__ == "__main__":
reader = OutlookLocalCalendarReader()
print(reader.load_data())

View File

@ -0,0 +1 @@
pywin32