mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-08-15 12:11:50 +00:00
118 lines
3.3 KiB
Plaintext
118 lines
3.3 KiB
Plaintext
![]() |
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "a8fda9ff",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Bored Llama: BoardDocs in LLaMA Index!\n",
|
||
|
"\n",
|
||
|
"This is a fun experiment to see if we can crawl a BoardDocs site to index it for LangChain fun."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"id": "013bd7f3",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import sys\n",
|
||
|
"from llama_index import download_loader\n",
|
||
|
"\n",
|
||
|
"# Use the temporary / staging location to exercise the loader before first checkin lands\n",
|
||
|
"BoardDocsReader = download_loader(\"BoardDocsReader\",\n",
|
||
|
" loader_hub_url=\"https://raw.githubusercontent.com/dweekly/llama-hub/boarddocs/loader_hub\",\n",
|
||
|
" refresh_cache=True)\n",
|
||
|
"loader = BoardDocsReader(site=\"ca/redwood\", committee_id=\"A4EP6J588C05\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"id": "27e1a431",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Parsing meeting CPSNV9612DF1\n",
|
||
|
" Agenda Title: Board of Trustees Regular Meeting - 7:00pm (Closed Session at 6:15 PM)\n",
|
||
|
" Agenda Date: Wednesday, May 10, 2023\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
|
||
|
"INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 18818 tokens\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# now the data is loaded, query it\n",
|
||
|
"from llama_index import GPTSimpleVectorIndex\n",
|
||
|
"\n",
|
||
|
"# load all meetings from this committee.\n",
|
||
|
"documents = loader.load_data(meeting_ids=[\"CPSNV9612DF1\"])\n",
|
||
|
"\n",
|
||
|
"# build an index\n",
|
||
|
"index = GPTSimpleVectorIndex.from_documents(documents)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 9,
|
||
|
"id": "f1701638",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 3832 tokens\n",
|
||
|
"INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"Trustee Weekly did not start attending meetings until May 10, 2023, the date of the Board of Trustees Regular Meeting, when the board ratified the warrant registers for the period April 1, 2023, through April 30, 2023.\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Now we can start asking it questions!!\n",
|
||
|
"answer = index.query('When did Trustee Weekly start attending meetings?')\n",
|
||
|
"print(answer.response)\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.11.3"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|