mirror of
https://github.com/run-llama/llama-hub.git
synced 2025-09-01 20:37:54 +00:00
367 lines
64 KiB
Plaintext
367 lines
64 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"%load_ext autoreload\n",
|
||
"%autoreload 2"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Docugami\n",
|
||
"This notebook covers how to load documents from `Docugami`. See [README](./README.md) for more details, and the advantages of using this system over alternative data loaders.\n",
|
||
"\n",
|
||
"## Prerequisites\n",
|
||
"1. Follow the Quick Start section in [README](./README.md)\n",
|
||
"2. Grab an access token for your workspace, and make sure it is set as the DOCUGAMI_API_KEY environment variable\n",
|
||
"3. Grab some docset and document IDs for your processed documents, as described here: https://help.docugami.com/home/docugami-api"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Load Documents\n",
|
||
"\n",
|
||
"If the DOCUGAMI_API_KEY environment variable is set, there is no need to pass it in to the loader explicitly otherwise you can pass it in as the `access_token` parameter."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/usr/local/lib/python3.10/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[Document(text='MUTUAL NON-DISCLOSURE AGREEMENT This Mutual Non-Disclosure Agreement (this “ Agreement ”) is entered into and made effective as of April 4 , 2018 between Docugami Inc. , a Delaware corporation , whose address is 150 Lake Street South , Suite 221 , Kirkland , Washington 98033 , and Caleb Divine , an individual, whose address is 1201 Rt 300 , Newburgh NY 12550 .', doc_id='5b79743d-22d6-43ec-8fd1-8026e1cf3333', embedding=None, doc_hash='368d8592f11eea5a4d5283bea95d58615ecb5c26d0ff334589530154567ba1c7', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ThisMutualNon-disclosureAgreement', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'ThisMutualNon-disclosureAgreement'}),\n",
|
||
" Document(text='The above named parties desire to engage in discussions regarding a potential agreement or other transaction between the parties (the “Purpose”). In connection with such discussions, it may be necessary for the parties to disclose to each other certain confidential information or materials to enable them to evaluate whether to enter into such agreement or transaction.', doc_id='01a98cfb-37e5-4b7b-9509-2a9630689459', embedding=None, doc_hash='82d619fcda012945be1f03fe6695214a4ca4d2cca1762b3bb7de49c9b3e6fc7f', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Discussions', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'Discussions'}),\n",
|
||
" Document(text='In consideration of the foregoing, the parties agree as follows:', doc_id='da4cf465-4846-4bfb-b0f6-30f443786c51', embedding=None, doc_hash='56c557f48bcb2f6f1d9543f5ebaf8403f7560855fc4fd56db8ce2d49956b04ae', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Consideration', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'Consideration'}),\n",
|
||
" Document(text='1. Confidential Information . For purposes of this Agreement , “ Confidential Information ” means any information or materials disclosed by one party to the other party that: (i) if disclosed in writing or in the form of tangible materials, is marked “confidential” or “proprietary” at the time of such disclosure; (ii) if disclosed orally or by visual presentation, is identified as “confidential” or “proprietary” at the time of such disclosure, and is summarized in a writing sent by the disclosing party to the receiving party within thirty ( 30 ) days after any such disclosure; or (iii) due to its nature or the circumstances of its disclosure, a person exercising reasonable business judgment would understand to be confidential or proprietary.', doc_id='3d62e470-561f-49b4-9e5c-5556f498fee4', embedding=None, doc_hash='2b897e1e8b630de4f0955b6401a88096c4bc65bcab5525e6986de49117581dbd', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Purposes/docset:ConfidentialInformation-section/docset:ConfidentialInformation[2]', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'ConfidentialInformation'}),\n",
|
||
" Document(text=\"2. Obligations and Restrictions . Each party agrees: (i) to maintain the other party's Confidential Information in strict confidence; (ii) not to disclose such Confidential Information to any third party; and (iii) not to use such Confidential Information for any purpose except for the Purpose. Each party may disclose the other party’s Confidential Information to its employees and consultants who have a bona fide need to know such Confidential Information for the Purpose, but solely to the extent necessary to pursue the Purpose and for no other purpose; provided, that each such employee and consultant first executes a written agreement (or is otherwise already bound by a written agreement) that contains use and nondisclosure restrictions at least as protective of the other party’s Confidential Information as those set forth in this Agreement .\", doc_id='e8d33277-b510-492c-9103-8ecec33a7f46', embedding=None, doc_hash='4863e312bc2c4c138558e37529e0ac109f18d4791495efb9f123bf36b0c73ef7', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Obligations/docset:ObligationsAndRestrictions-section/docset:ObligationsAndRestrictions', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'ObligationsAndRestrictions'}),\n",
|
||
" Document(text='3. Exceptions. The obligations and restrictions in Section 2 will not apply to any information or materials that:', doc_id='744f6b89-c7fe-4465-a479-b290e7b7e05b', embedding=None, doc_hash='9e8f83441e0ac68bc629fcfcd9a5b185b8dde0c2eb7d7209c12283fe2e42369f', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Exceptions/docset:Exceptions-section/docset:Exceptions[2]', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Exceptions'}),\n",
|
||
" Document(text='(i) were, at the date of disclosure, or have subsequently become, generally known or available to the public through no act or failure to act by the receiving party;', doc_id='4585a605-dcb2-4447-a863-ebd2a5cac527', embedding=None, doc_hash='8c232813973ffefbc77c3ac3a89c7e3d4cdd78540c62700b2be74bb392f688d1', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheDate/docset:TheDate/docset:TheDate', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheDate'}),\n",
|
||
" Document(text='(ii) were rightfully known by the receiving party prior to receiving such information or materials from the disclosing party;', doc_id='7ced323c-d8aa-46b9-9834-764a036a0505', embedding=None, doc_hash='cf659be008f33074f113194b8e69fd7c91ae5c48d4a9ee4514b573525d666443', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheDate/docset:SuchInformation/docset:TheReceivingParty', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheReceivingParty'}),\n",
|
||
" Document(text='(iii) are rightfully acquired by the receiving party from a third party who has the right to disclose such information or materials without breach of any confidentiality obligation to the disclosing party;', doc_id='9c3dcc5d-c0ee-4e05-88b2-8817eb129b98', embedding=None, doc_hash='917fcdd86af937d5616920f555349580287c71de5f0b7ceef01b2bb2ed7ba85b', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheDate/docset:TheReceivingParty/docset:TheReceivingParty', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheReceivingParty'}),\n",
|
||
" Document(text='4. Compelled Disclosure . Nothing in this Agreement will be deemed to restrict a party from disclosing the other party’s Confidential Information to the extent required by any order, subpoena, law, statute or regulation; provided, that the party required to make such a disclosure uses reasonable efforts to give the other party reasonable advance notice of such required disclosure in order to enable the other party to prevent or limit such disclosure.', doc_id='82518a78-3f6f-4b05-a275-c5cd13251bed', embedding=None, doc_hash='aa6792c7c7b06bc8369669d9f9396d1130cf43b46bab873995ea9e4baefac99b', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Disclosure/docset:CompelledDisclosure-section/docset:CompelledDisclosure', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'CompelledDisclosure'}),\n",
|
||
" Document(text='5. Return of Confidential Information . Upon the completion or abandonment of the Purpose, and in any event upon the disclosing party’s request, the receiving party will promptly return to the disclosing party all tangible items and embodiments containing or consisting of the disclosing party’s Confidential Information and all copies thereof (including electronic copies), and any notes, analyses, compilations, studies, interpretations, memoranda or other documents (regardless of the form thereof) prepared by or on behalf of the receiving party that contain or are based upon the disclosing party’s Confidential Information .', doc_id='fc1eb0a3-2217-4da9-98d6-9661408f9a7a', embedding=None, doc_hash='96228459e30933cfb4caef7bf622a8d69d1e2ad81a5bab80b437d476064d180e', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheCompletion/docset:ReturnofConfidentialInformation-section/docset:ReturnofConfidentialInformation', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'ReturnofConfidentialInformation'}),\n",
|
||
" Document(text='6. No Obligations . Each party retains the right to determine whether to disclose any Confidential Information to the other party.', doc_id='6c2ecdec-0060-4f18-8eb6-8f3ce3da6a65', embedding=None, doc_hash='ec8a72aec1f9b3c79b75320d0791b57cc4ad6477b5736f8dd7d412601a045de0', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:NoObligations/docset:NoObligations-section/docset:NoObligations[2]', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'NoObligations'}),\n",
|
||
" Document(text='7. No Warranty. ALL CONFIDENTIAL INFORMATION IS PROVIDED BY THE DISCLOSING PARTY “AS IS ”.', doc_id='22520ae9-8c08-404e-afe1-8a2f491ee406', embedding=None, doc_hash='6ec95c44359ab768933cd504cef2995a968fd0b2c492ec9e86feca828bada420', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:NoWarranty/docset:NoWarranty-section/docset:NoWarranty[2]', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'NoWarranty'}),\n",
|
||
" Document(text='8. Term. This Agreement will remain in effect for a period of seven ( 7 ) years from the date of last disclosure of Confidential Information by either party, at which time it will terminate.', doc_id='7f82e397-58e4-4ba7-b5a3-f1c06456f3b9', embedding=None, doc_hash='0433e3ad32d54390ef1a56f71737ec1022ea503f69154fea86f7412ab06be4e4', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:ThisAgreement/docset:Term-section/docset:Term', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Term'}),\n",
|
||
" Document(text='9. Equitable Relief . Each party acknowledges that the unauthorized use or disclosure of the disclosing party’s Confidential Information may cause the disclosing party to incur irreparable harm and significant damages, the degree of which may be difficult to ascertain. Accordingly, each party agrees that the disclosing party will have the right to seek immediate equitable relief to enjoin any unauthorized use or disclosure of its Confidential Information , in addition to any other rights and remedies that it may have at law or otherwise.', doc_id='76441299-44bb-4906-9032-b2beb92ba18d', embedding=None, doc_hash='644d87541f4a44c2aa5fa8507178e85198fcaec1649e43202b00f5309322909a', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:EquitableRelief/docset:EquitableRelief-section/docset:EquitableRelief[2]', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'EquitableRelief'}),\n",
|
||
" Document(text='10. Non-compete. To the maximum extent permitted by applicable law, during the Term of this Agreement and for a period of one ( 1 ) year thereafter, Caleb Divine may not market software products or do business that directly or indirectly competes with Docugami software products .', doc_id='791efbee-b23e-4939-bb2c-ed22b9bcbf56', embedding=None, doc_hash='fbe53faf86b169b8eff8493aa195dfc93bdb23b49b634852d61a713ea70b89c5', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheMaximumExtent/docset:Non-compete-section/docset:Non-compete', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Non-compete'}),\n",
|
||
" Document(text='11. Miscellaneous. This Agreement will be governed and construed in accordance with the laws of the State of Washington , excluding its body of law controlling conflict of laws. This Agreement is the complete and exclusive understanding and agreement between the parties regarding the subject matter of this Agreement and supersedes all prior agreements, understandings and communications, oral or written, between the parties regarding the subject matter of this Agreement . If any provision of this Agreement is held invalid or unenforceable by a court of competent jurisdiction, that provision of this Agreement will be enforced to the maximum extent permissible and the other provisions of this Agreement will remain in full force and effect. Neither party may assign this Agreement , in whole or in part, by operation of law or otherwise, without the other party’s prior written consent, and any attempted assignment without such consent will be void. This Agreement may be executed in counterparts, each of which will be deemed an original, but all of which together will constitute one and the same instrument.', doc_id='7a56c334-c26e-46d2-9a00-ce41b6183729', embedding=None, doc_hash='3a77702016956a88bdb283d44959e69ffac34b85aa9517f0690daf7f66ad23c0', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Accordance/docset:Miscellaneous-section/docset:Miscellaneous', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Miscellaneous'}),\n",
|
||
" Document(text='[SIGNATURE PAGE FOLLOWS] IN WITNESS WHEREOF, the parties hereto have executed this Mutual Non-Disclosure Agreement by their duly authorized officers or representatives as of the date first set forth above.', doc_id='0e95c0c2-8163-4706-b196-d96ccff34b94', embedding=None, doc_hash='39fd4861c450f4aa99db25846744bb1c85524dc093e8bf2f9c9e872c1040594c', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:Witness/docset:TheParties/docset:TheParties', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheParties'}),\n",
|
||
" Document(text='DOCUGAMI INC . : \\n\\n Caleb Divine : \\n\\n Signature: Signature: Name: \\n\\n Jean Paoli Name: Title: \\n\\n CEO Title:', doc_id='b19489b8-c024-4abb-b715-f876a0dc4d0f', embedding=None, doc_hash='e311d3c0a8be4ae9f3543e2586bad04cb321ab2613a025422e4b320e3771232b', extra_info={'xpath': '/docset:MutualNon-disclosure/docset:Witness/docset:TheParties/docset:DocugamiInc/docset:DocugamiInc/xhtml:table', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': '', 'tag': 'table'}),\n",
|
||
" Document(text='MUTUAL NON-DISCLOSURE AGREEMENT This Mutual Non-Disclosure Agreement (this “Agreement’) is entered into and made effective as of 2/4/2018 between Docugami Inc. , a Delaware corporation , whose address is 150 Lake Street South , Suite 221 , Kirkland , Washington 98033 , and Leonarda Hosler , an individual, whose address is 374 William S Canning Blvd , Fall River MA 2721 .', doc_id='d04341d0-d8c9-4089-9df7-4148b3e4aa56', embedding=None, doc_hash='348c40a6fef0b79ee94c35d1ea6722717afb473dbf9fe97cae7ea73ad9a9f6f2', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ThisMutualNon-disclosureAgreement/docset:ThisMutualNon-disclosureAgreement', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'p', 'tag': 'ThisMutualNon-disclosureAgreement'}),\n",
|
||
" Document(text='The above named parties desire to engage in discussions regarding a potential agreement or other transaction between the parties (the “ Purpose’). In connection with such discussions, it may be necessary for the parties to disclose to each other certain confidential information or materials to enable them to evaluate whether to enter into such agreement or transaction.', doc_id='0593c518-acf5-4133-8037-048aebc0c663', embedding=None, doc_hash='bf0d4bf957e57f052949cae510d3a6a012a908edc9e83fe9186c98e5b8229f53', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ThisMutualNon-disclosureAgreement/docset:Discussions', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'p', 'tag': 'Discussions'}),\n",
|
||
" Document(text='In consideration of the foregoing, the parties agree as follows:', doc_id='a35bcd82-1c70-43c4-9f5f-b86f629e4d33', embedding=None, doc_hash='e96bfc5a92ebedb78c5ead071be8a1c94cd54fc3ad8a6c3fc9359ceeec7ca5e2', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ThisMutualNon-disclosureAgreement/docset:Consideration/docset:Consideration', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'p', 'tag': 'Consideration'}),\n",
|
||
" Document(text='iL. Confidential Information . For purposes of this Agreement , “ Confidential Information ” means any information or materials disclosed by one party to the other party that: (i) if disclosed in writing or in the form of tangible materials, is marked “confidential” or “proprietary” at the time of such disclosure; (ii) if disclosed orally or by visual presentation, is identified as “confidential” or “proprietary” at the time of such disclosure, and is summarized in a writing sent by the disclosing party to the receiving party within thirty ( 30 ) days after any such disclosure; or (iii) due to its nature or the circumstances of its disclosure, a person exercising reasonable business judgment would understand to be confidential or proprietary.', doc_id='d98e14d0-a258-4abd-8c6b-ab13777fd6e0', embedding=None, doc_hash='d234322b083398877c5fde4e8d8e208d2f8853041d8bb36c285d3f7fd922984b', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ThisMutualNon-disclosureAgreement/docset:Consideration/dg:chunk/docset:IlConfidentialInformation/docset:ConfidentialInformation-section/docset:ConfidentialInformation[2]', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'div', 'tag': 'ConfidentialInformation'}),\n",
|
||
" Document(text=\"Ze Obligations and Restrictions . Each party agrees: (i) to maintain the other party's Confidential Information in strict confidence; (ii) not to disclose such Confidential Information to any third party; and ( iii ) not to use such Confidential Information for any purpose except for the Purpose. Each party may disclose the other party’s Confidential Information to its employees and consultants who have a bona fide need to know such Confidential Information for the Purpose, but solely to the extent necessary to pursue the Purpose and for no other purpose; provided, that each such employee and consultant first executes a written agreement (or is otherwise already bound by a written agreement) that contains use and nondisclosure restrictions at least as protective of the other party’s Confidential Information as those set forth in this Agreement .\", doc_id='5b172bd9-8a65-4e07-8a5e-7e7cf7c44dd6', embedding=None, doc_hash='72cb89c8632ae4c6e6a70a744b4b80c6c654dcbcc19fa6685b3cce76621d0ac5', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:StrictConfidence/docset:StrictConfidence', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'div', 'tag': 'StrictConfidence'}),\n",
|
||
" Document(text='is Exceptions. The obligations and restrictions in Section 2 will not apply to any information or materials that:', doc_id='58ca8833-6f92-4851-96f3-7a51113adedb', embedding=None, doc_hash='209d94e5c657f32d408683f633ae6365e64933a5f573da42ac00aa5f28a4e8ed', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:Exceptions-section/docset:Exceptions/docset:TheObligations', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'p', 'tag': 'TheObligations'}),\n",
|
||
" Document(text='(i) were, at the date of disclosure, or have subsequently become, generally known or available to the public through no act or failure to act by the receiving party;', doc_id='2ec0323a-64e5-4e79-8e77-752c7b380f15', embedding=None, doc_hash='6874cd19a59835e3088539c2f030a7a48e161144f3027aa998e9a1e4e6d97e55', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:Exceptions-section/docset:Exceptions/docset:TheDate/docset:TheDate/docset:TheDate', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'p', 'tag': 'TheDate'}),\n",
|
||
" Document(text='(ii) were rightfully known by the receiving party prior to receiving such information or materials from the disclosing party;', doc_id='2416b30b-1114-4e26-9303-7eccd9b84bac', embedding=None, doc_hash='45b2b8b3c690f1740cfb9d107a7aac93957558657f23fb33de1d5c1a3d9766d5', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:Exceptions-section/docset:Exceptions/docset:TheDate/docset:SuchInformation/docset:TheReceivingParty', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'p', 'tag': 'TheReceivingParty'}),\n",
|
||
" Document(text='(iii) are rightfully acquired by the receiving party from a third party who has the right to disclose such information or materials without breach of any confidentiality obligation to the disclosing party; or', doc_id='dffa7159-9047-4610-9876-71698ba79fd4', embedding=None, doc_hash='c5619393062d7e158772d63dc65e69cc1e0307001e94e7fa95c8ddef0af995ae', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:Exceptions-section/docset:Exceptions/docset:TheDate/docset:TheReceivingParty[1]/docset:TheReceivingParty', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'p', 'tag': 'TheReceivingParty'}),\n",
|
||
" Document(text='(iv) are independently developed by the receiving party without access to any Confidential Information of the disclosing party.', doc_id='d4a600a3-9d1a-41fa-be2b-f56f4f61850b', embedding=None, doc_hash='ae9f256f6d6c0eced35325f4581324e5d7c62d015b399dc6d53c422a1f7299f6', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:Exceptions-section/docset:Exceptions/docset:TheDate/docset:TheReceivingParty[2]/docset:TheReceivingParty', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'p', 'tag': 'TheReceivingParty'}),\n",
|
||
" Document(text='4. Compelled Disclosure . Nothing in this Agreement will be deemed to restrict a party from disclosing the other party’s Confidential Information to the extent required by any order, subpoena, law, statute or regulation; provided, that the party required to make such a disclosure uses reasonable efforts to give the other party reasonable advance notice of such required disclosure in order to enable the other party to prevent or limit such disclosure.', doc_id='e6fb4243-0253-495a-b665-f22f5bc10417', embedding=None, doc_hash='05a777dce696dde4b471bb89e39c811d431b0094678a1aa43d54375e883971b2', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:Exceptions-section/docset:Exceptions/docset:TheDate/docset:Disclosure/docset:CompelledDisclosure-section/docset:CompelledDisclosure', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'div', 'tag': 'CompelledDisclosure'}),\n",
|
||
" Document(text='5. Return of Confidential Information . Upon the completion or abandonment of the Purpose, and in any event upon the disclosing party’s request, the receiving party will promptly return to the disclosing party all tangible items and embodiments containing or consisting of the disclosing party’s Confidential Information and all copies thereof (including electronic copies), and any notes, analyses, compilations, studies, interpretations, memoranda or other documents (regardless of the form thereof) prepared by or on behalf of the receiving party that contain or are based upon the disclosing party’s Confidential Information .', doc_id='7e8f2217-c28e-4d85-b8c6-9be8eaeddf75', embedding=None, doc_hash='362a60349e7398655df172684ddd398718b40111ec44f3a4b3766286277398ec', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:Exceptions-section/docset:Exceptions/docset:TheDate/docset:TheCompletion/docset:ReturnofConfidentialInformation-section/docset:ReturnofConfidentialInformation', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'div', 'tag': 'ReturnofConfidentialInformation'}),\n",
|
||
" Document(text='6. No Obligations . Each party retains the right, in its sole discretion, to determine whether to disclose any Confidential Information to the other party. Neither party will be required to negotiate nor enter into any other agreements or arrangements with the other party, whether or not related to the Purpose.', doc_id='de4c2867-e641-4b1d-9a4e-021150852f84', embedding=None, doc_hash='3ba55f7b677f0eb25b628b31fa943f62ee192afe8b34c3ef76712f67c7cf9489', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:Exceptions-section/docset:Exceptions/docset:TheDate/docset:NoObligations/docset:NoObligations-section/docset:NoObligations[2]', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'div', 'tag': 'NoObligations'}),\n",
|
||
" Document(text='ie No License . All Confidential Information remains the sole and exclusive property of the disclosing party. Each party acknowledges and agrees that nothing in this Agreement will be construed as granting any rights to the receiving party, by license or otherwise, in or to any Confidential Information of the disclosing party, or any patent, copyright or other intellectual property or proprietary rights of the disclosing party, except as specified in this Agreement .', doc_id='4e95c5fe-376b-47a4-91d4-a1a909c6e05f', embedding=None, doc_hash='137345886cee3712d74ff75fba0e2143d33b82f7b3cf1b70883719b412a37e1c', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:IeNoLicense-section/docset:IeNoLicense[2]/docset:TheSoleAndExclusiveProperty/docset:TheSoleAndExclusiveProperty', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'p', 'tag': 'TheSoleAndExclusiveProperty'}),\n",
|
||
" Document(text='8. No Warranty. ALL CONFIDENTIAL INFORMATION IS PROVIDED BY THE DISCLOSING PARTY “AS IS ”.', doc_id='a7e24ffc-4978-40c9-892e-55c572be340e', embedding=None, doc_hash='1e12c1c70bca5303929648afd4bf2240fb0540572f8c1de37668e5f8d4928667', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:IeNoLicense-section/docset:IeNoLicense[2]/docset:TheSoleAndExclusiveProperty/docset:NoWarranty/docset:NoWarranty/docset:NoWarranty-section/docset:NoWarranty[2]', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'div', 'tag': 'NoWarranty'}),\n",
|
||
" Document(text='9. Term. This Agreement will remain in effect for a period of five ( 5 ) years from the date of last disclosure of Confidential Information by either party, at which time it will terminate.', doc_id='384a4ba6-24a8-4a99-ac2e-0b11e45ecab5', embedding=None, doc_hash='5ac55d08549f7d427f14fc7c2e35ad192b84a86784cafe120e139ad8fd4ad216', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:IeNoLicense-section/docset:IeNoLicense[2]/docset:TheSoleAndExclusiveProperty/docset:NoWarranty/docset:ThisAgreement/docset:Term-section/docset:Term', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'div', 'tag': 'Term'}),\n",
|
||
" Document(text='10. Equitable Relief . Each party acknowledges that the unauthorized use or disclosure of the disclosing party’s Confidential Information may cause the disclosing party to incur irreparable harm and significant damages, the degree of which may be difficult to ascertain. Accordingly, each party agrees that the disclosing party will have the right to seek immediate equitable relief to enjoin any unauthorized use or disclosure of its Confidential Information , in addition to any other rights and remedies that it may have at law or otherwise.', doc_id='48d223e5-8260-4d13-bf07-13cc1ef3ff0e', embedding=None, doc_hash='f5c840f17e99e16816b1b1263b4062b382001a9a8467cca43c3624da4cb357c5', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:IeNoLicense-section/docset:IeNoLicense[2]/docset:TheSoleAndExclusiveProperty/docset:NoWarranty/docset:EquitableRelief/docset:EquitableRelief-section/docset:EquitableRelief[2]', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'div', 'tag': 'EquitableRelief'}),\n",
|
||
" Document(text='11. Miscellaneous. This Agreement will be governed and construed in accordance with the laws of the State of Washington , excluding its body of law controlling conflict of laws. This Agreement is the complete and exclusive understanding and agreement between the parties regarding the subject matter of this Agreement and supersedes all prior agreements, understandings and communications, oral or written, between the parties regarding the subject matter of this Agreement . If any provision of this Agreement is held invalid or unenforceable by a court of competent jurisdiction, that provision of this Agreement will be enforced to the maximum extent permissible and the other provisions of this Agreement will remain in full force and effect. Neither party may assign this Agreement , in whole or in part, by operation of law or otherwise, without the other party’s prior written consent, and any attempted assignment without such consent will be void. This Agreement may be executed in counterparts, each of which will be deemed an original, but all of which together will constitute one and the same instrument.', doc_id='c4094ceb-2b83-45b8-ae7a-54f5b825d245', embedding=None, doc_hash='cdb200445df5b1577492f4c03e1f643d6a6195e7bdf794e0a77d6eb63c99ccad', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:IeNoLicense-section/docset:IeNoLicense[2]/docset:TheSoleAndExclusiveProperty/docset:NoWarranty/docset:Accordance/docset:MiscellaneousThisAgreementWillBeGovernedAndConstruedinAccordancewithT/docset:MiscellaneousThisAgreementWillBeGovernedAndConstruedinAccordancewithT', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'div', 'tag': 'MiscellaneousThisAgreementWillBeGovernedAndConstruedinAccordancewithT'}),\n",
|
||
" Document(text='[SIGNATURE PAGE FOLLows] IN WITNESS WHEREOF, the parties hereto have executed this Mutual Non-Disclosure Agreement by their duly authorized officers or representatives as of the date first set forth above. DOCUGAMI INC . INC .: Leonarda Hosler : Name: Name: Title: Title:', doc_id='0eb79084-683b-46e6-908a-5ae3eeea83da', embedding=None, doc_hash='2c9caed694c0786e86562840dbd946d23c3e5c36c30718204d0d7e0986d84d9d', extra_info={'xpath': '/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ZeObligationsAndRestrictions-section/docset:ZeObligationsAndRestrictions/docset:Exceptions/docset:IeNoLicense-section/docset:IeNoLicense[2]/docset:SIGNATUREPAGEFOLLOWS-section/docset:SIGNATUREPAGEFOLLOWS/docset:INWITNESSWHEREOF/docset:TheParties', 'id': 'bpc1vibyeke2', 'name': 'NDA simple layout nicely scanned.pdf', 'structure': 'p', 'tag': 'TheParties'})]"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"from llama_index import download_loader\n",
|
||
"\n",
|
||
"DocugamiReader = download_loader('DocugamiReader')\n",
|
||
"\n",
|
||
"docset_id=\"ecxqpipcoe2p\"\n",
|
||
"document_ids=[\"43rj0ds7s0ur\", \"bpc1vibyeke2\"]\n",
|
||
"\n",
|
||
"loader = DocugamiReader()\n",
|
||
"documents = loader.load_data(docset_id=docset_id, document_ids=document_ids)\n",
|
||
"documents"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"The `extra_info` for each `Document` (really, a chunk of an actual PDF, DOC or DOCX) contains some useful additional information:\n",
|
||
"\n",
|
||
"1. **id and name:** ID and Name of the file (PDF, DOC or DOCX) the chunk is sourced from within Docugami.\n",
|
||
"2. **xpath:** XPath inside the XML representation of the document, for the chunk. Useful for source citations directly to the actual chunk inside the document XML.\n",
|
||
"3. **structure:** Structural attributes of the chunk, e.g. h1, h2, div, table, td, etc. Useful to filter out certain kinds of chunks if needed by the caller.\n",
|
||
"4. **tag:** Semantic tag for the chunk, using various generative and extractive techniques. More details here: https://github.com/docugami/DFM-benchmarks"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Basic Use: Docugami Loader for Document QA\n",
|
||
"\n",
|
||
"You can use the Docugami Loader like a standard loader for Document QA over multiple docs, albeit with much better chunks that follow the natural contours of the document. There are many great tutorials on how to do this, e.g. [this one](https://gpt-index.readthedocs.io/en/latest/getting_started/starter_example.html). We can just use the same code, but use the `DocugamiLoader` for better chunking, instead of loading text or PDF files directly with basic splitting techniques."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from llama_index import GPTVectorStoreIndex\n",
|
||
"\n",
|
||
"DocugamiReader = download_loader('DocugamiReader')\n",
|
||
"\n",
|
||
"# For this example, we already have a processed docset for a set of lease documents\n",
|
||
"docset_id=\"wh2kned25uqm\"\n",
|
||
"documents = loader.load_data(docset_id=docset_id)"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"The documents returned by the loader are already split into chunks. Optionally, we can use the metadata on each chunk, for example the structure or tag attributes, to do any post-processing we want.\n",
|
||
"\n",
|
||
"We will just use the output of the `DocugamiLoader` as-is to set up a query engine the usual way."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"index = GPTVectorStoreIndex.from_documents(documents)\n",
|
||
"query_engine = index.as_query_engine(similarity_top_k=5)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n",
|
||
"Tenants can place or attach signs (digital or otherwise) or other forms of identification to their properties after receiving written permission from the landlord. Any signs or other forms of identification must conform to all applicable laws, ordinances, etc. governing the same. Tenants must also have any window or glass identification completely removed and cleaned at their expense promptly upon vacating the premises.\n",
|
||
"NodeWithScore(node=Node(text='Signage. Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises. \\n\\n ARTICLE VII UTILITIES 7.01', doc_id='1e89f5bf-0cb6-491a-acf6-8be9e6dc6ffb', embedding=None, doc_hash='50e3892892d18199d6b6db4d6205beb327f09b031539afc9e9b239548639a89d', extra_info={'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:ThisOFFICELEASEAGREEMENTThis/docset:ArticleIBasic/docset:ArticleIiiUseAndCareOf/docset:ARTICLEIIIUSEANDCAREOFPREMISES-section/docset:ARTICLEIIIUSEANDCAREOFPREMISES/docset:NoOtherPurposes/docset:TenantsResponsibility/dg:chunk', 'id': 'g2fvhekmltza', 'name': 'TruTone Lane 6.pdf', 'structure': 'lim', 'tag': 'chunk'}, node_info={'start': 0, 'end': 747}, relationships={<DocumentRelationship.SOURCE: '1'>: '84779dc3-a104-4bff-bced-f7e2dde58cc1'}), score=0.8617797232715348)\n",
|
||
"NodeWithScore(node=Node(text=\"24. SIGNS . No signage shall be placed by Tenant on any portion of the Project . However, Tenant shall be permitted to place a sign bearing its name in a location approved by Landlord near the entrance to the Premises (at Tenant's cost ) and will be furnished a single listing of its name in the Building's directory (at Landlord 's cost ), all in accordance with the criteria adopted from time to time by Landlord for the Project . Any changes or additional listings in the directory shall be furnished (subject to availability of space) for the then Building Standard charge .\", doc_id='ac44b4fe-551d-4b17-9100-0889c4842f5f', embedding=None, doc_hash='d383b8792e586979e3082ebd4f9e06121f663a53ffd6a712c5622f5cec65bba5', extra_info={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Period/docset:ApplicableSalesTax/docset:PercentageRent/docset:TheTerms/docset:Indemnification/docset:INDEMNIFICATION-section/docset:INDEMNIFICATION/docset:Waiver/docset:Waiver/docset:Signs/docset:SIGNS-section/docset:SIGNS', 'id': 'qkn9cyqsiuch', 'name': 'Shorebucks LLC_AZ.pdf', 'structure': 'div', 'tag': 'SIGNS'}, node_info={'start': 0, 'end': 597}, relationships={<DocumentRelationship.SOURCE: '1'>: 'eccd7773-5fcf-4064-8f62-67f45c724ecd'}), score=0.8508437736864953)\n",
|
||
"NodeWithScore(node=Node(text='ARTICLE VI SIGNAGE 6.01 Signage . Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises.', doc_id='7aa86f41-d711-42bd-94ed-fc99f7c90443', embedding=None, doc_hash='9cf87806118da7fa99be843c9f926302b5ccf1716ceec2fa2352b5f8726182c1', extra_info={'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:Article/docset:ARTICLEVISIGNAGE-section/docset:_601Signage-section/docset:_601Signage', 'id': 'v1bvgaozfkak', 'name': 'TruTone Lane 2.docx', 'structure': 'div', 'tag': '_601Signage'}, node_info={'start': 0, 'end': 684}, relationships={<DocumentRelationship.SOURCE: '1'>: '1afd38c9-900b-4e5d-902a-020f0b824751'}), score=0.8491465492763234)\n",
|
||
"NodeWithScore(node=Node(text=\"44 . Signs And Exterior Appearance Tenant agrees that all signs, awnings, protective gates, security devices and other installations visible from the exterior of the Premises shall be subject to Landlord 's prior written approval , shall be subject to the prior approval of the Landmarks Preservation Commission of the City of New York , if required, and shall not interfere with or block either of the adjacent stores, provided, however, that Landlord shall not unreasonably withhold consent for signs that Tenant desires to install. Tenant agrees that any permitted signs, awnings, protective gates, security devices, and other installations shall be installed at Tenant ’s sole cost and expense professionally prepared and dignified and subject to Landlord 's prior written approval , which shall not be unreasonably withheld, delayed or conditioned, and subject to such reasonable rules and restrictions as Landlord from time to time may impose. Tenant shall submit to Landlord drawings of the proposed signs and other installations, showing the size, color, illumination and general appearance thereof, together with a statement of the manner in which the same are to be affixed to the Premises. Tenant shall not commence the installation of the proposed signs and other installations unless and until Landlord shall have approved the same in writing. . Tenant shall not install any neon sign. The aforesaid signs shall be used solely for the purpose of identifying Tenant 's business . No changes shall be made in the signs and other installations without first obtaining Landlord 's prior written consent thereto, which consent shall not be unreasonably withheld, delayed or conditioned. Tenant shall, at its own cost and expense, obtain and exhibit to Landlord such permits or certificates of approval as Tenant may be required to obtain from any and all City , State and other authorities having jurisdiction covering the erection, installation, maintenance or use of said signs or other installations, and Tenant shall maintain the said signs and other installations together with any appurtenances thereto in good order and\", doc_id='df1def90-2c7e-449b-96f1-4c8b62b44e74', embedding=None, doc_hash='b5b03c69d554cba1efa555a76d44ebc099877484f788d748b1892a9622a1de1a', extra_info={'xpath': '/docset:Rider/docset:RIDERTOLEASE-section/docset:RIDERTOLEASE/docset:FixedRent/docset:TermYearPeriod/docset:Lease/docset:_42hSmokingProhibitedTenant/docset:TenantsEmployees/docset:TheArea/docset:_44SignsAndExteriorAppearance-section/docset:_44SignsAndExteriorAppearance/docset:TheExterior/docset:TheExterior', 'id': 'omvs4mysdk6b', 'name': 'TruTone Lane 1.docx', 'structure': 'p', 'tag': 'TheExterior'}, node_info={'start': 0, 'end': 2181}, relationships={<DocumentRelationship.SOURCE: '1'>: '063cb174-4593-461a-8afe-1bec0190cecd'}), score=0.8484529479796804)\n",
|
||
"NodeWithScore(node=Node(text=\"24. SIGNS . No signage shall be placed by Tenant on any portion of the Project . However, Tenant shall be permitted to place a sign bearing its name in a location approved by Landlord near the entrance to the Premises (at Tenant's cost ) and will be furnished a single listing of its name in the Building's directory (at Landlord 's cost ), all in accordance with the criteria adopted from time to time by Landlord for the Project . Any changes or additional listings in the directory shall be furnished (subject to availability of space) for the then Building Standard charge .\", doc_id='87672346-8373-4c19-a1e3-5fe55410c561', embedding=None, doc_hash='6f90f6b2ac80947c072d4fbfcab6824f68af7b74ab3b284b6e65d30ce3ed6f4c', extra_info={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Guaranty-section/docset:Guaranty[2]/docset:TheTransfer/docset:TheTerms/docset:Indemnification/docset:INDEMNIFICATION-section/docset:INDEMNIFICATION/docset:Waiver/docset:Waiver/docset:Signs/docset:SIGNS-section/docset:SIGNS', 'id': 'md8rieecquyv', 'name': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'SIGNS'}, node_info={'start': 0, 'end': 597}, relationships={<DocumentRelationship.SOURCE: '1'>: '942fd7ed-4303-4b8e-8877-b198e8bb80bb'}), score=0.8460398975408094)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Try out the query engine with example query\n",
|
||
"response = query_engine.query(\"What can tenants do with signage on their properties?\")\n",
|
||
"print(response.response)\n",
|
||
"for node in response.source_nodes:\n",
|
||
" print(node)"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Using Docugami to Add Metadata to Chunks for High Accuracy Document QA\n",
|
||
"\n",
|
||
"One issue with large documents is that the correct answer to your question may depend on chunks that are far apart in the document. Typical chunking techniques, even with overlap, will struggle with providing the LLM sufficent context to answer such questions. With upcoming very large context LLMs, it may be possible to stuff a lot of tokens, perhaps even entire documents, inside the context but this will still hit limits at some point with very long documents, or a lot of documents.\n",
|
||
"\n",
|
||
"For example, if we ask a more complex question that requires the LLM to draw on chunks from different parts of the document, even OpenAI's powerful LLM is unable to answer correctly."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n",
|
||
"The security deposit for the property owned by Birch Street is not specified in the context information provided.\n",
|
||
"Shorebucks LLC_CO.pdf\n",
|
||
"1.12 Security Deposit . As of the Date of this Lease , there is no Security Deposit .\n",
|
||
"Shorebucks LLC_AZ.pdf\n",
|
||
"22. SECURITY DEPOSIT . The Security Deposit shall be held by Landlord as security for Tenant 's full and faithful performance of this Lease including the payment of Rent . Tenant grants Landlord a security interest in the Security Deposit . The Security Deposit may be commingled with other funds of Landlord and Landlord shall have no liability for payment of any interest on the Security Deposit . Landlord may apply the Security Deposit to the extent required to cure any default by Tenant . If Landlord so applies the Security Deposit , Tenant shall deliver to Landlord the amount necessary to replenish the Security Deposit to its original sum within five days after notice from Landlord . The Security Deposit shall not be deemed an advance payment of Rent or a measure of damages for any default by Tenant , nor shall it be a defense to any action that Landlord may bring against Tenant .\n",
|
||
"Shorebucks LLC_NJ.pdf\n",
|
||
"22. SECURITY DEPOSIT . The Security Deposit shall be held by Landlord as security for Tenant 's full and faithful performance of this Lease including the payment of Rent . Tenant grants Landlord a security interest in the Security Deposit . The Security Deposit may be commingled with other funds of Landlord and Landlord shall have no liability for payment of any interest on the Security Deposit . Landlord may apply the Security Deposit to the extent required to cure any default by Tenant . If Landlord so applies the Security Deposit , Tenant shall deliver to Landlord the amount necessary to replenish the Security Deposit to its original sum within five days after notice from Landlord . The Security Deposit shall not be deemed an advance payment of Rent or a measure of damages for any default by Tenant , nor shall it be a defense to any action that Landlord may bring against Tenant .\n",
|
||
"Shorebucks LLC_CO.pdf\n",
|
||
"22. SECURITY DEPOSIT . The Security Deposit shall be held by Landlord as security for Tenant 's full and faithful performance of this Lease including the payment of Rent . Tenant grants Landlord a security interest in the Security Deposit . The Security Deposit may be commingled with other funds of Landlord and Landlord shall have no liability for payment of any interest on the Security Deposit . Landlord may apply the Security Deposit to the extent required to cure any default by Tenant . If Landlord so applies the Security Deposit , Tenant shall deliver to Landlord the amount necessary to replenish the Security Deposit to its original sum within five days after notice from Landlord . The Security Deposit shall not be deemed an advance payment of Rent or a measure of damages for any default by Tenant , nor shall it be a defense to any action that Landlord may bring against Tenant .\n",
|
||
"Shorebucks LLC_NJ.pdf\n",
|
||
"1.12 Security Deposit . As of the Date of this Lease , there is no Security Deposit .\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/usr/local/lib/python3.10/dist-packages/llama_index/data_structs/node.py:181: UserWarning: .extra_info is deprecated, use .node.extra_info instead\n",
|
||
" warnings.warn(\".extra_info is deprecated, use .node.extra_info instead\")\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"response = query_engine.query(\"What is the security deposit for the property owned by Birch Street?\")\n",
|
||
"print(response.response) # the correct answer should be $78,000\n",
|
||
"for node in response.source_nodes:\n",
|
||
" print(node.extra_info[\"name\"])\n",
|
||
" print(node.node.text)"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"At first glance the answer may seem reasonable, but if you review the source chunks carefully for this answer, you will see that the chunking of the document did not end up putting the Landlord name and the rentable area in the same context, since they are far apart in the document. The query engine therefore ends up finding unrelated chunks from other documents not even related to the **Birch Street** landlord. That landlord happens to be mentioned on the first page of the file **TruTone Lane 1.docx** file, and none of the source chunks used by the query engine contain the correct answer (**$78,000**), and the answer is therefore incorrect."
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Docugami can help here. Chunks are annotated with additional metadata created using different techniques if a user has been [using Docugami](https://help.docugami.com/home/reports). More technical approaches will be added later.\n",
|
||
"\n",
|
||
"Specifically, let's look at the additional metadata that is returned on the documents returned by docugami after some additional use, in the form of some simple key/value pairs on all the text chunks:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:ThisOfficeLeaseAgreement',\n",
|
||
" 'id': 'v1bvgaozfkak',\n",
|
||
" 'name': 'TruTone Lane 2.docx',\n",
|
||
" 'structure': 'p',\n",
|
||
" 'tag': 'ThisOfficeLeaseAgreement',\n",
|
||
" 'Landlord': 'BUBBA CENTER PARTNERSHIP',\n",
|
||
" 'Tenant': 'Truetone Lane LLC'}"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"docset_id=\"wh2kned25uqm\"\n",
|
||
"documents = loader.load_data(docset_id=docset_id)\n",
|
||
"documents[0].extra_info"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"index = GPTVectorStoreIndex.from_documents(documents)\n",
|
||
"query_engine = index.as_query_engine(similarity_top_k=5)"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's run the same question again. It returns the correct result since all the chunks have metadata key/value pairs on them carrying key information about the document even if this infromation is physically very far away from the source chunk used to generate the answer."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n",
|
||
"The security deposit for the property owned by Birch Street is $78,000.\n",
|
||
"TruTone Lane 1.docx\n",
|
||
"NodeWithScore(node=Node(text='$ 20,023.78 of the Security to the Tenant and the Security obligation shall be $ 31,976.72 and remain until the expiration or earlier termination of this Lease .', doc_id='d34995dc-cbe2-4f70-a248-ca0e8c937d7b', embedding=None, doc_hash='84ec2102e9e9cc07487556772b8f97aa14e01d6f763ba1315e0ae2132d67691c', extra_info={'xpath': '/docset:Rider/docset:RIDERTOLEASE-section/docset:RIDERTOLEASE/docset:FixedRent/docset:TermYearPeriod/docset:Lease/docset:_42hSmokingProhibitedTenant/docset:TenantsEmployees/docset:TheArea/docset:_56SecurityDeposit-section/docset:_56SecurityDeposit/docset:TheForegoing/docset:TheSecurity', 'id': 'omvs4mysdk6b', 'name': 'TruTone Lane 1.docx', 'structure': 'p', 'tag': 'TheSecurity', 'Landlord': 'BIRCH STREET , LLC', 'Tenant': 'Trutone Lane LLC'}, node_info={'start': 0, 'end': 171}, relationships={<DocumentRelationship.SOURCE: '1'>: '659e354f-b749-4938-967f-638fea177fa0'}), score=0.8289222268861388)\n",
|
||
"TruTone Lane 1.docx\n",
|
||
"NodeWithScore(node=Node(text='The Security being held pursuant to this Article shall at all times be an amount equal to \\n\\n\\n\\n\\n\\n three ( 3 ) times the monthly fixed rent then reserved under Article 40 of this Lease . On the first day of the month following each anniversary of the Rent Commencement Date of this Lease , Tenant shall pay to Landlord funds sufficient so that the un-applied Security held by Landlord shall at all times equal three times the monthly fixed rent then reserved under Article 40 of this Lease .', doc_id='f0d27e80-90b8-4436-85eb-f0deaa485b77', embedding=None, doc_hash='a0fcdc9cd2dc6dc9f9f97423f8d76494af80b500c5c7bdbefc2c05aea9085d89', extra_info={'xpath': '/docset:Rider/docset:RIDERTOLEASE-section/docset:RIDERTOLEASE/docset:FixedRent/docset:TermYearPeriod/docset:Lease/docset:_42hSmokingProhibitedTenant/docset:TenantsEmployees/docset:TheArea/docset:_56SecurityDeposit-section/docset:_56SecurityDeposit/docset:TheEvent/docset:TheSecurity', 'id': 'omvs4mysdk6b', 'name': 'TruTone Lane 1.docx', 'structure': 'p', 'tag': 'TheSecurity', 'Landlord': 'BIRCH STREET , LLC', 'Tenant': 'Trutone Lane LLC'}, node_info={'start': 0, 'end': 517}, relationships={<DocumentRelationship.SOURCE: '1'>: 'a97b9f2a-2e01-4d65-bfd3-89aa18fca942'}), score=0.8227364343224219)\n",
|
||
"TruTone Lane 1.docx\n",
|
||
"NodeWithScore(node=Node(text=\"56 . Security Deposit Upon execution of this Lease , Tenant has deposited with Landlord the sum of $ 78,000.00 in good funds as security for the full and faithful performance and observance by Tenant of the terms, covenants and conditions of this Lease (the “Security”). If Tenant defaults in the performance or observance of any term, covenant or condition of this Lease , including without limitation the obligation of Tenant to pay any rent or other sum required hereunder, Landlord may use, after 10 days written notice to Tenant ,apply, or retain, without any application to any court or tribunal, the whole or any part of the Security so deposited to the extent required for the payment of any rent or any other sum as to which Tenant is in default or for any sum which Landlord may expend or may be required to expend by reason of Tenant 's default , including without limitation any damages or deficiency accrued before or after summary proceedings or other re-entry by Landlord . Such use, application, or retention by the Landlord shall be without prejudice to Landlord ’s rights to seek any and all additional rent and/or damages that may have accrued. If Tenant shall fully and faithfully observe and perform all of the terms, covenants, and conditions of this Lease , the Security , shall be returned to Tenant after the end of the term of this Lease or at permissible early termination as provided herein and the delivery of possession of the demised Premises to Landlord .\", doc_id='5456d727-13b5-4197-9070-b6acad549f58', embedding=None, doc_hash='3ae3541e4750e005e58bd6a9c8379f548309eadc5559b6fd9d0636fea6909fc0', extra_info={'xpath': '/docset:Rider/docset:RIDERTOLEASE-section/docset:RIDERTOLEASE/docset:FixedRent/docset:TermYearPeriod/docset:Lease/docset:_42hSmokingProhibitedTenant/docset:TenantsEmployees/docset:TheArea/docset:_56SecurityDeposit-section/docset:_56SecurityDeposit/docset:Execution', 'id': 'omvs4mysdk6b', 'name': 'TruTone Lane 1.docx', 'structure': 'p', 'tag': 'Execution', 'Landlord': 'BIRCH STREET , LLC', 'Tenant': 'Trutone Lane LLC'}, node_info={'start': 0, 'end': 1533}, relationships={<DocumentRelationship.SOURCE: '1'>: '9a9d71ca-c0a3-4ab4-ab58-cf5cd611a53c'}), score=0.8225535679622072)\n",
|
||
"Shorebucks LLC_CO.pdf\n",
|
||
"NodeWithScore(node=Node(text='1.12 Security Deposit . As of the Date of this Lease , there is no Security Deposit .', doc_id='418f110b-c0fd-4813-9649-2003a0c47504', embedding=None, doc_hash='6344b5840d282172b1bcb82b4e29a74e524b011c1f73dfd26d5563dfc796193b', extra_info={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:First/docset:ApplicableSalesTax/docset:PercentageRent/docset:SecurityDeposit/docset:SecurityDeposit-section/docset:SecurityDeposit[2]', 'id': 'dsyfhh4vpeyf', 'name': 'Shorebucks LLC_CO.pdf', 'structure': 'div', 'tag': 'SecurityDeposit', 'Landlord': 'Perry & Blair LLC', 'Tenant': 'Shorebucks LLC'}, node_info={'start': 0, 'end': 87}, relationships={<DocumentRelationship.SOURCE: '1'>: '04ab648a-18d9-473f-83cc-ea0a872a1049'}), score=0.8222174185648468)\n",
|
||
"TruTone Lane 1.docx\n",
|
||
"NodeWithScore(node=Node(text='Notwithstanding the foregoing, provided Tenant is not then in default of this Lease , on March 15 , 2022 , Landlord shall return $ 26,000 of the Security to the Tenant and the Security obligation shall be $ 52,000 . In the event Tenant continues to comply with all of the terms and conditions of this Lease , and provided Tenant is not then in default of this Lease , on March 15 , 2022 , Landlord shall return', doc_id='738bf4d8-cf83-43da-9083-49434954f8f3', embedding=None, doc_hash='20e4e9257ce3e8a2072eb0d4973160af6362a290c0e4fac16be6195356f97898', extra_info={'xpath': '/docset:Rider/docset:RIDERTOLEASE-section/docset:RIDERTOLEASE/docset:FixedRent/docset:TermYearPeriod/docset:Lease/docset:_42hSmokingProhibitedTenant/docset:TenantsEmployees/docset:TheArea/docset:_56SecurityDeposit-section/docset:_56SecurityDeposit/docset:TheForegoing/docset:TheForegoing', 'id': 'omvs4mysdk6b', 'name': 'TruTone Lane 1.docx', 'structure': 'p', 'tag': 'TheForegoing', 'Landlord': 'BIRCH STREET , LLC', 'Tenant': 'Trutone Lane LLC'}, node_info={'start': 0, 'end': 438}, relationships={<DocumentRelationship.SOURCE: '1'>: '7248de1e-0140-4e59-b324-ee5df7065ceb'}), score=0.8159128793979528)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"response = query_engine.query(\"What is the security deposit for the property owned by Birch Street?\")\n",
|
||
"print(response.response) # the correct answer should be $78,000\n",
|
||
"for node in response.source_nodes:\n",
|
||
" print(node.extra_info[\"name\"])\n",
|
||
" print(node)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.6"
|
||
},
|
||
"orig_nbformat": 4
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|