diff --git a/CHANGELOG.md b/CHANGELOG.md index 26414ce65..7029840db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,11 @@ -## 0.10.17-dev6 +## 0.10.17-dev7 ### Enhancements * **Adds data source properties to SharePoint, Outlook, Onedrive, Reddit, and Slack connectors** These properties (date_created, date_modified, version, source_url, record_locator) are written to element metadata during ingest, mapping elements to information about the document source from which they derive. This functionality enables downstream applications to reveal source document applications, e.g. a link to a GDrive doc, Salesforce record, etc. * **Add functionality to save embedded images in PDF's separately as images** This allows users to save embedded images in PDF's separately as images, given some directory path. The saved image path is written to the metadata for the Image element. Downstream applications may benefit by providing users with image links from relevant "hits." * **Azure Cognite Search destination connector** New Azure Cognitive Search destination connector added to ingest CLI. Users may now use `unstructured-ingest` to write partitioned data from over 20 data sources (so far) to an Azure Cognitive Search index. +* **Improves salesforce partitioning** Partitions Salesforce data as xlm instead of text for improved detail and flexibility. Partitions htmlbody instead of textbody for Salesforce emails. Importance: Allows all Salesforce fields to be ingested and gives Salesforce emails more detailed partitioning. ### Features diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9EIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9EIAU.json index b873afc4f..13a10d93c 100644 --- a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9EIAU.json +++ b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9EIAU.json @@ -1,28 +1,99 @@ [ + { + "type": "Title", + "element_id": "aa611cb13dc326fcc963f235dd44fe19", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "attributes.type: Campaign" + }, + { + "type": "Title", + "element_id": "f8870b3af3522dbf19218c2da5fea3a6", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "attributes.url: /services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU" + }, { "type": "UncategorizedText", "element_id": "d6a8689a12ad0cd0314b04e1c2cee3c9", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Id: 701Hu000001eX9EIAU" }, + { + "type": "NarrativeText", + "element_id": "d5c5abf10d418d9ead0dc394cfce036f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "IsDeleted: False" + }, { "type": "Title", "element_id": "69d9d94f0bc4b8d425fa99dce2b78311", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Name: GC Product Webinar - Jan 7, 2002" }, + { + "type": "Title", + "element_id": "fe183f24b67677f16a0243a4ddc33e26", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ParentId: None" + }, { "type": "Title", "element_id": "f80d0033c7e5a8d6ae66778815e33f35", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Type: Webinar" }, @@ -30,8 +101,13 @@ "type": "Title", "element_id": "ad1b8a8ebbde05c57a773f60045de6f6", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Status: Completed" }, @@ -39,8 +115,13 @@ "type": "UncategorizedText", "element_id": "08336889c7ebb4ba297a396eb072d83c", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "StartDate: 2023-01-29" }, @@ -48,17 +129,41 @@ "type": "UncategorizedText", "element_id": "887de29c98087cc9d07b242432cff930", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "EndDate: 2023-01-29" }, + { + "type": "Title", + "element_id": "a5ee71fd9f08bb6b06fa5431b5f7f05f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ExpectedRevenue: 3000000.0" + }, { "type": "Title", "element_id": "ffe62693f34276315c62d28b06005bcf", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "BudgetedCost: 10000.0" }, @@ -66,17 +171,69 @@ "type": "Title", "element_id": "5739187b01834fedcc2362b5d3841d07", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "ActualCost: 11400.0" }, + { + "type": "Title", + "element_id": "270ecd02e255a8bfd922acf3f2d8b1ac", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ExpectedResponse: 10.0" + }, + { + "type": "Title", + "element_id": "fd2a3392a4ae4eec64c5e089b629d17c", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberSent: 5000.0" + }, + { + "type": "Title", + "element_id": "26a526fe045853810a4e7fe8b770b26e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "IsActive: True" + }, { "type": "Title", "element_id": "8750ec1f6f59b282d104b919c7ffab0f", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Description: None" }, @@ -84,8 +241,13 @@ "type": "Title", "element_id": "54e16b100bf2118d4c7c18c3f93e2223", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "NumberOfLeads: 0" }, @@ -93,9 +255,238 @@ "type": "Title", "element_id": "db1ed63f2ed83e51f75619047c417e49", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "NumberOfConvertedLeads: 0" + }, + { + "type": "Title", + "element_id": "9dba8c11c1bf3f3e308445ae74489cc1", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfContacts: 0" + }, + { + "type": "Title", + "element_id": "142cc644728caa77d46cb28875d8ce87", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfResponses: 0" + }, + { + "type": "Title", + "element_id": "98ac718353f4a43493b23c7e9b492e7f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfOpportunities: 0" + }, + { + "type": "Title", + "element_id": "dcb99328bf342d4a482a0a49c6de6141", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfWonOpportunities: 0" + }, + { + "type": "Title", + "element_id": "e25dfbf0a1c88cdf6656e608124a768f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "AmountAllOpportunities: 0.0" + }, + { + "type": "Title", + "element_id": "b9df436714f2480e46280cb066f7f56e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "AmountWonOpportunities: 0.0" + }, + { + "type": "Title", + "element_id": "7d5637770b00a3a3aa72ee14bfd23d24", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "OwnerId: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "47f5c9e49e63fd1cd28b6e807ddab95a", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CreatedDate: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "ca6b08cabe001c2e8926a751b8dc0a6e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CreatedById: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "5ca9d0a0ae1c9a4e10ba2750b17e136c", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastModifiedDate: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "6830685f5e226de71ad73b18f844fb9b", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastModifiedById: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "a0e91d7714e8a62170ac1073cd7befd9", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "SystemModstamp: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "dc125e8de9d761a3eeea22aa8973baa8", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastActivityDate: None" + }, + { + "type": "Title", + "element_id": "531505f0a6d181a8f21e57c60dc7a55b", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastViewedDate: None" + }, + { + "type": "Title", + "element_id": "c9a06c69fe4b81e0f8b9a055f5d9b03f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastReferencedDate: None" + }, + { + "type": "Title", + "element_id": "d35500dc646c6d074c74d5170fba1e95", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CampaignMemberRecordTypeId: None" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9FIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9FIAU.json index 89519a9e6..348448455 100644 --- a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9FIAU.json +++ b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9FIAU.json @@ -1,28 +1,99 @@ [ + { + "type": "Title", + "element_id": "aa611cb13dc326fcc963f235dd44fe19", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "attributes.type: Campaign" + }, + { + "type": "Title", + "element_id": "ef2f4870487db654c7d2b57f829814b5", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "attributes.url: /services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU" + }, { "type": "UncategorizedText", "element_id": "582957afcab0b1f0df8e414c601b679a", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Id: 701Hu000001eX9FIAU" }, + { + "type": "NarrativeText", + "element_id": "d5c5abf10d418d9ead0dc394cfce036f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "IsDeleted: False" + }, { "type": "Title", "element_id": "b1097935922e3ea926a35ace5fe68a61", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Name: User Conference - Jun 17-19, 2002" }, + { + "type": "Title", + "element_id": "fe183f24b67677f16a0243a4ddc33e26", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ParentId: None" + }, { "type": "Title", "element_id": "0c29c88f4b31c6f5caf8b36885c4c1c6", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Type: Conference" }, @@ -30,8 +101,13 @@ "type": "Title", "element_id": "b537e10ee8e78ec7a18792eaa76ce0e4", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Status: Planned" }, @@ -39,8 +115,13 @@ "type": "UncategorizedText", "element_id": "6cbe5bf59dabd290307e79547b2a86f2", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "StartDate: 2023-07-09" }, @@ -48,17 +129,41 @@ "type": "UncategorizedText", "element_id": "6cb3c2919b2c47be4b187d17c316a539", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "EndDate: 2023-07-11" }, + { + "type": "Title", + "element_id": "cdf407dd243fc297a308f721260c21a8", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ExpectedRevenue: 5500000.0" + }, { "type": "Title", "element_id": "3062c8ae2aae8afe4e38d8fa2a6ea248", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "BudgetedCost: 100000.0" }, @@ -66,17 +171,69 @@ "type": "Title", "element_id": "754630afa9bba639e59a8a80785f2766", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "ActualCost: None" }, + { + "type": "Title", + "element_id": "ef26c11c41150d99466a68eeec51b640", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ExpectedResponse: 15.0" + }, + { + "type": "Title", + "element_id": "7fd5b7e9536d536d0eba5047b11a5cb8", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberSent: 40000.0" + }, + { + "type": "Title", + "element_id": "26a526fe045853810a4e7fe8b770b26e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "IsActive: True" + }, { "type": "Title", "element_id": "8750ec1f6f59b282d104b919c7ffab0f", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Description: None" }, @@ -84,8 +241,13 @@ "type": "Title", "element_id": "54e16b100bf2118d4c7c18c3f93e2223", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "NumberOfLeads: 0" }, @@ -93,9 +255,238 @@ "type": "Title", "element_id": "db1ed63f2ed83e51f75619047c417e49", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "NumberOfConvertedLeads: 0" + }, + { + "type": "Title", + "element_id": "9dba8c11c1bf3f3e308445ae74489cc1", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfContacts: 0" + }, + { + "type": "Title", + "element_id": "142cc644728caa77d46cb28875d8ce87", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfResponses: 0" + }, + { + "type": "Title", + "element_id": "98ac718353f4a43493b23c7e9b492e7f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfOpportunities: 0" + }, + { + "type": "Title", + "element_id": "dcb99328bf342d4a482a0a49c6de6141", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfWonOpportunities: 0" + }, + { + "type": "Title", + "element_id": "e25dfbf0a1c88cdf6656e608124a768f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "AmountAllOpportunities: 0.0" + }, + { + "type": "Title", + "element_id": "b9df436714f2480e46280cb066f7f56e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "AmountWonOpportunities: 0.0" + }, + { + "type": "Title", + "element_id": "7d5637770b00a3a3aa72ee14bfd23d24", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "OwnerId: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "47f5c9e49e63fd1cd28b6e807ddab95a", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CreatedDate: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "ca6b08cabe001c2e8926a751b8dc0a6e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CreatedById: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "5ca9d0a0ae1c9a4e10ba2750b17e136c", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastModifiedDate: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "6830685f5e226de71ad73b18f844fb9b", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastModifiedById: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "a0e91d7714e8a62170ac1073cd7befd9", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "SystemModstamp: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "dc125e8de9d761a3eeea22aa8973baa8", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastActivityDate: None" + }, + { + "type": "Title", + "element_id": "531505f0a6d181a8f21e57c60dc7a55b", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastViewedDate: None" + }, + { + "type": "Title", + "element_id": "c9a06c69fe4b81e0f8b9a055f5d9b03f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastReferencedDate: None" + }, + { + "type": "Title", + "element_id": "d35500dc646c6d074c74d5170fba1e95", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CampaignMemberRecordTypeId: None" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9GIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9GIAU.json index d87da8362..6303c6076 100644 --- a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9GIAU.json +++ b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9GIAU.json @@ -1,28 +1,99 @@ [ + { + "type": "Title", + "element_id": "aa611cb13dc326fcc963f235dd44fe19", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "attributes.type: Campaign" + }, + { + "type": "Title", + "element_id": "cd41ba564c2035ae1d02f5e3650f39e1", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "attributes.url: /services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU" + }, { "type": "UncategorizedText", "element_id": "153e1fa63953e7e19f9004e0253eab68", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Id: 701Hu000001eX9GIAU" }, + { + "type": "NarrativeText", + "element_id": "d5c5abf10d418d9ead0dc394cfce036f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "IsDeleted: False" + }, { "type": "Title", "element_id": "9b9c5e71eff6a483e85da52d1a1f1005", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Name: DM Campaign to Top Customers - Nov 12-23, 2001" }, + { + "type": "Title", + "element_id": "fe183f24b67677f16a0243a4ddc33e26", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ParentId: None" + }, { "type": "Title", "element_id": "28fa7658294d152358d23d8bde3c9e56", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Type: Direct Mail" }, @@ -30,8 +101,13 @@ "type": "Title", "element_id": "ad1b8a8ebbde05c57a773f60045de6f6", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Status: Completed" }, @@ -39,8 +115,13 @@ "type": "UncategorizedText", "element_id": "59b06bd535fc60e446ce4f6db6392a8d", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "StartDate: 2022-12-04" }, @@ -48,17 +129,41 @@ "type": "UncategorizedText", "element_id": "a7caa29a82d158a422b901babed10321", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "EndDate: 2022-12-15" }, + { + "type": "Title", + "element_id": "16c237251889480d8e0e5234cf198e2b", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ExpectedRevenue: 2500000.0" + }, { "type": "Title", "element_id": "a830028696ccdc6c73f26e2f5f0b3e0d", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "BudgetedCost: 25000.0" }, @@ -66,17 +171,69 @@ "type": "Title", "element_id": "f393ef1a67e6b638d6825e00ffa85b5e", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "ActualCost: 23500.0" }, + { + "type": "Title", + "element_id": "7dbe784238042675737de400c3112644", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ExpectedResponse: 7.0" + }, + { + "type": "Title", + "element_id": "fd49829c9a39ed51c7cf5dab24ec4e88", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberSent: 15000.0" + }, + { + "type": "Title", + "element_id": "26a526fe045853810a4e7fe8b770b26e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "IsActive: True" + }, { "type": "Title", "element_id": "8750ec1f6f59b282d104b919c7ffab0f", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Description: None" }, @@ -84,8 +241,13 @@ "type": "Title", "element_id": "54e16b100bf2118d4c7c18c3f93e2223", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "NumberOfLeads: 0" }, @@ -93,9 +255,238 @@ "type": "Title", "element_id": "db1ed63f2ed83e51f75619047c417e49", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "NumberOfConvertedLeads: 0" + }, + { + "type": "Title", + "element_id": "9dba8c11c1bf3f3e308445ae74489cc1", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfContacts: 0" + }, + { + "type": "Title", + "element_id": "142cc644728caa77d46cb28875d8ce87", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfResponses: 0" + }, + { + "type": "Title", + "element_id": "98ac718353f4a43493b23c7e9b492e7f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfOpportunities: 0" + }, + { + "type": "Title", + "element_id": "dcb99328bf342d4a482a0a49c6de6141", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfWonOpportunities: 0" + }, + { + "type": "Title", + "element_id": "e25dfbf0a1c88cdf6656e608124a768f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "AmountAllOpportunities: 0.0" + }, + { + "type": "Title", + "element_id": "b9df436714f2480e46280cb066f7f56e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "AmountWonOpportunities: 0.0" + }, + { + "type": "Title", + "element_id": "7d5637770b00a3a3aa72ee14bfd23d24", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "OwnerId: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "47f5c9e49e63fd1cd28b6e807ddab95a", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CreatedDate: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "ca6b08cabe001c2e8926a751b8dc0a6e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CreatedById: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "5ca9d0a0ae1c9a4e10ba2750b17e136c", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastModifiedDate: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "6830685f5e226de71ad73b18f844fb9b", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastModifiedById: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "a0e91d7714e8a62170ac1073cd7befd9", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "SystemModstamp: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "dc125e8de9d761a3eeea22aa8973baa8", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastActivityDate: None" + }, + { + "type": "Title", + "element_id": "531505f0a6d181a8f21e57c60dc7a55b", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastViewedDate: None" + }, + { + "type": "Title", + "element_id": "c9a06c69fe4b81e0f8b9a055f5d9b03f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastReferencedDate: None" + }, + { + "type": "Title", + "element_id": "d35500dc646c6d074c74d5170fba1e95", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CampaignMemberRecordTypeId: None" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9HIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9HIAU.json index 36753980b..afd222d7b 100644 --- a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9HIAU.json +++ b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9HIAU.json @@ -1,28 +1,99 @@ [ + { + "type": "Title", + "element_id": "aa611cb13dc326fcc963f235dd44fe19", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "attributes.type: Campaign" + }, + { + "type": "Title", + "element_id": "25bfa76752b584b3234d689bcd20bb82", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "attributes.url: /services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU" + }, { "type": "UncategorizedText", "element_id": "697cb5681a4f17c6cb712dfce64ae2d1", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Id: 701Hu000001eX9HIAU" }, + { + "type": "NarrativeText", + "element_id": "d5c5abf10d418d9ead0dc394cfce036f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "IsDeleted: False" + }, { "type": "Title", "element_id": "37c6c6fcf92fd42c703ad967a4691a32", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Name: International Electrical Engineers Association Trade Show - Mar 4-5, 2002" }, + { + "type": "Title", + "element_id": "fe183f24b67677f16a0243a4ddc33e26", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ParentId: None" + }, { "type": "Title", "element_id": "a4b5a79024228eb84bcefe4bfe8bce47", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Type: Trade Show" }, @@ -30,8 +101,13 @@ "type": "Title", "element_id": "b537e10ee8e78ec7a18792eaa76ce0e4", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Status: Planned" }, @@ -39,8 +115,13 @@ "type": "UncategorizedText", "element_id": "5d68899808565a0eb340e7ce9a42c981", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "StartDate: 2023-03-26" }, @@ -48,17 +129,41 @@ "type": "UncategorizedText", "element_id": "eb23b79d3e286bef615fa4bf7bbe6c6d", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "EndDate: 2023-03-27" }, + { + "type": "Title", + "element_id": "1614a2aec5fd66fd65f6b06ab5164043", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ExpectedRevenue: 8500000.0" + }, { "type": "Title", "element_id": "2bb64865cdfea5db0f000dde162fc372", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "BudgetedCost: 50000.0" }, @@ -66,17 +171,69 @@ "type": "Title", "element_id": "754630afa9bba639e59a8a80785f2766", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "ActualCost: None" }, + { + "type": "Title", + "element_id": "bb7195d3ff60f4ab8878cc91c69c0963", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "ExpectedResponse: 0.0" + }, + { + "type": "Title", + "element_id": "cb20be3dba5cef028c220c4592939f47", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberSent: 0.0" + }, + { + "type": "Title", + "element_id": "26a526fe045853810a4e7fe8b770b26e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "IsActive: True" + }, { "type": "Title", "element_id": "8750ec1f6f59b282d104b919c7ffab0f", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "Description: None" }, @@ -84,8 +241,13 @@ "type": "Title", "element_id": "54e16b100bf2118d4c7c18c3f93e2223", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "NumberOfLeads: 0" }, @@ -93,9 +255,238 @@ "type": "Title", "element_id": "db1ed63f2ed83e51f75619047c417e49", "metadata": { - "data_source": {}, - "filetype": "text/plain" + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" }, "text": "NumberOfConvertedLeads: 0" + }, + { + "type": "Title", + "element_id": "9dba8c11c1bf3f3e308445ae74489cc1", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfContacts: 0" + }, + { + "type": "Title", + "element_id": "142cc644728caa77d46cb28875d8ce87", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfResponses: 0" + }, + { + "type": "Title", + "element_id": "98ac718353f4a43493b23c7e9b492e7f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfOpportunities: 0" + }, + { + "type": "Title", + "element_id": "dcb99328bf342d4a482a0a49c6de6141", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "NumberOfWonOpportunities: 0" + }, + { + "type": "Title", + "element_id": "e25dfbf0a1c88cdf6656e608124a768f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "AmountAllOpportunities: 0.0" + }, + { + "type": "Title", + "element_id": "b9df436714f2480e46280cb066f7f56e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "AmountWonOpportunities: 0.0" + }, + { + "type": "Title", + "element_id": "7d5637770b00a3a3aa72ee14bfd23d24", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "OwnerId: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "47f5c9e49e63fd1cd28b6e807ddab95a", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CreatedDate: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "ca6b08cabe001c2e8926a751b8dc0a6e", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CreatedById: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "5ca9d0a0ae1c9a4e10ba2750b17e136c", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastModifiedDate: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "6830685f5e226de71ad73b18f844fb9b", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastModifiedById: 005Hu00000Owz9uIAB" + }, + { + "type": "UncategorizedText", + "element_id": "a0e91d7714e8a62170ac1073cd7befd9", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "SystemModstamp: 2023-08-18T11:48:08.000+0000" + }, + { + "type": "Title", + "element_id": "dc125e8de9d761a3eeea22aa8973baa8", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastActivityDate: None" + }, + { + "type": "Title", + "element_id": "531505f0a6d181a8f21e57c60dc7a55b", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastViewedDate: None" + }, + { + "type": "Title", + "element_id": "c9a06c69fe4b81e0f8b9a055f5d9b03f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "LastReferencedDate: None" + }, + { + "type": "Title", + "element_id": "d35500dc646c6d074c74d5170fba1e95", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU", + "version": "2023-08-18T11:48:08.000+0000", + "date_created": "2023-08-18T11:48:08", + "date_modified": "2023-08-18T11:48:08" + }, + "filetype": "application/xml" + }, + "text": "CampaignMemberRecordTypeId: None" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErPIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErPIAU.json index 3c3dd2f35..1583be1ac 100644 --- a/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErPIAU.json +++ b/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErPIAU.json @@ -1,9 +1,14 @@ [ { "type": "NarrativeText", - "element_id": "d954fa8e82ded23ebde30b2d53d5f81d", + "element_id": "3c0a5d71e07c028ce79bf3fd1a04e0ab", "metadata": { - "data_source": {}, + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErPIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:34:16", + "date_modified": "2023-08-21T14:34:16" + }, "filetype": "message/rfc822", "sent_from": [ "devops+salesforce-connector@unstructured.io" @@ -13,6 +18,69 @@ ], "subject": "Test of email 1" }, - "text": "Jane. This is a test of sending you an email from Salesforce!\n\n_____________________________________________________________________\nPowered by Salesforce\nhttp://www.salesforce.com/" + "text": "Jane. This is a test of sending you an email from Salesforce!" + }, + { + "type": "UncategorizedText", + "element_id": "ada246e54712b8cb04cf9eeed042d9c8", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErPIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:34:16", + "date_modified": "2023-08-21T14:34:16" + }, + "filetype": "message/rfc822", + "sent_from": [ + "devops+salesforce-connector@unstructured.io" + ], + "sent_to": [ + "jane_gray@uoa.edu" + ], + "subject": "Test of email 1" + }, + "text": "_____________________________________________________________________" + }, + { + "type": "Title", + "element_id": "b5cfb3867874279548fa1feb3b688171", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErPIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:34:16", + "date_modified": "2023-08-21T14:34:16" + }, + "filetype": "message/rfc822", + "sent_from": [ + "devops+salesforce-connector@unstructured.io" + ], + "sent_to": [ + "jane_gray@uoa.edu" + ], + "subject": "Test of email 1" + }, + "text": "Powered by Salesforce" + }, + { + "type": "Title", + "element_id": "f3a436e93d3660eedd11ac4c7de5d8cf", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErPIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:34:16", + "date_modified": "2023-08-21T14:34:16" + }, + "filetype": "message/rfc822", + "sent_from": [ + "devops+salesforce-connector@unstructured.io" + ], + "sent_to": [ + "jane_gray@uoa.edu" + ], + "subject": "Test of email 1" + }, + "text": "http://www.salesforce.com/" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErQIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErQIAU.json index 903312d7d..4e6303b00 100644 --- a/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErQIAU.json +++ b/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErQIAU.json @@ -1,9 +1,14 @@ [ { - "type": "NarrativeText", - "element_id": "f5ac98aa9002453f536877714c5eb88d", + "type": "Title", + "element_id": "b4e8db308de7be5daf941cdfe9ea77cd", "metadata": { - "data_source": {}, + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:35:49", + "date_modified": "2023-08-20T14:35:55" + }, "filetype": "message/rfc822", "sent_from": [ "devops+salesforce-connector@unstructured.io" @@ -13,6 +18,132 @@ ], "subject": "Test of Salesforce 2" }, - "text": "Hey Sean.\n\nTesting email parsing here.\nType: email\n\nJust testing the email system\n\n_____________________________________________________________________\nPowered by Salesforce\nhttp://www.salesforce.com/" + "text": "Hey Sean." + }, + { + "type": "NarrativeText", + "element_id": "383058971777c314cff4cc10960ee84f", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:35:49", + "date_modified": "2023-08-20T14:35:55" + }, + "filetype": "message/rfc822", + "sent_from": [ + "devops+salesforce-connector@unstructured.io" + ], + "sent_to": [ + "sean@edge.com" + ], + "subject": "Test of Salesforce 2" + }, + "text": "Testing email parsing here." + }, + { + "type": "Title", + "element_id": "37e9e141c34a359d036d4760368cf88b", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:35:49", + "date_modified": "2023-08-20T14:35:55" + }, + "filetype": "message/rfc822", + "sent_from": [ + "devops+salesforce-connector@unstructured.io" + ], + "sent_to": [ + "sean@edge.com" + ], + "subject": "Test of Salesforce 2" + }, + "text": "Type: email" + }, + { + "type": "NarrativeText", + "element_id": "36a9f06f033f03c18d12b8318e018b65", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:35:49", + "date_modified": "2023-08-20T14:35:55" + }, + "filetype": "message/rfc822", + "sent_from": [ + "devops+salesforce-connector@unstructured.io" + ], + "sent_to": [ + "sean@edge.com" + ], + "subject": "Test of Salesforce 2" + }, + "text": "Just testing the email system" + }, + { + "type": "UncategorizedText", + "element_id": "ada246e54712b8cb04cf9eeed042d9c8", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:35:49", + "date_modified": "2023-08-20T14:35:55" + }, + "filetype": "message/rfc822", + "sent_from": [ + "devops+salesforce-connector@unstructured.io" + ], + "sent_to": [ + "sean@edge.com" + ], + "subject": "Test of Salesforce 2" + }, + "text": "_____________________________________________________________________" + }, + { + "type": "Title", + "element_id": "b5cfb3867874279548fa1feb3b688171", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:35:49", + "date_modified": "2023-08-20T14:35:55" + }, + "filetype": "message/rfc822", + "sent_from": [ + "devops+salesforce-connector@unstructured.io" + ], + "sent_to": [ + "sean@edge.com" + ], + "subject": "Test of Salesforce 2" + }, + "text": "Powered by Salesforce" + }, + { + "type": "Title", + "element_id": "f3a436e93d3660eedd11ac4c7de5d8cf", + "metadata": { + "data_source": { + "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU", + "version": "2023-09-14T11:40:03.000+0000", + "date_created": "2023-08-20T14:35:49", + "date_modified": "2023-08-20T14:35:55" + }, + "filetype": "message/rfc822", + "sent_from": [ + "devops+salesforce-connector@unstructured.io" + ], + "sent_to": [ + "sean@edge.com" + ], + "subject": "Test of Salesforce 2" + }, + "text": "http://www.salesforce.com/" } ] \ No newline at end of file diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 42ab343e2..86af595d8 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.10.17-dev6" # pragma: no cover +__version__ = "0.10.17-dev7" # pragma: no cover diff --git a/unstructured/ingest/connector/salesforce.py b/unstructured/ingest/connector/salesforce.py index a68d3db8e..35c92a43b 100644 --- a/unstructured/ingest/connector/salesforce.py +++ b/unstructured/ingest/connector/salesforce.py @@ -9,7 +9,9 @@ https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_de """ import os import typing as t +from collections import OrderedDict from dataclasses import dataclass +from datetime import datetime from email.utils import formatdate from pathlib import Path from string import Template @@ -17,12 +19,14 @@ from textwrap import dedent from dateutil import parser # type: ignore +from unstructured.ingest.error import SourceConnectionError from unstructured.ingest.interfaces import ( BaseConnectorConfig, BaseIngestDoc, BaseSourceConnector, IngestDocCleanupMixin, SourceConnectorCleanupMixin, + SourceMetadata, ) from unstructured.ingest.logger import logger from unstructured.utils import requires_dependencies @@ -32,8 +36,9 @@ class MissingCategoryError(Exception): """There are no categories with that name.""" -ACCEPTED_CATEGORIES = ["Account", "Case", "Campaign", "EmailMessage", "Lead"] +SALESFORCE_API_VERSION = "57.0" +ACCEPTED_CATEGORIES = ["Account", "Case", "Campaign", "EmailMessage", "Lead"] EMAIL_TEMPLATE = Template( """MIME-Version: 1.0 @@ -48,73 +53,11 @@ Content-Type: text/plain; charset="UTF-8" $textbody --00000000000095c9b205eff92630 Content-Type: text/html; charset="UTF-8" -$textbody +$htmlbody --00000000000095c9b205eff92630-- """, ) -ACCOUNT_TEMPLATE = Template( - """Id: $id -Name: $name -Type: $account_type -Phone: $phone -AccountNumber: $account_number -Website: $website -Industry: $industry -AnnualRevenue: $annual_revenue -NumberOfEmployees: $number_employees -Ownership: $ownership -TickerSymbol: $ticker_symbol -Description: $description -Rating: $rating -DandbCompanyId: $dnb_id -""", -) - -LEAD_TEMPLATE = Template( - """Id: $id -Name: $name -Title: $title -Company: $company -Phone: $phone -Email: $email -Website: $website -Description: $description -LeadSource: $lead_source -Rating: $rating -Status: $status -Industry: $industry -""", -) - -CASE_TEMPLATE = Template( - """Id: $id -Type: $type -Status: $status -Reason: $reason -Origin: $origin -Subject: $subject -Priority: $priority -Description: $description -Comments: $comments -""", -) - -CAMPAIGN_TEMPLATE = Template( - """Id: $id -Name: $name -Type: $type -Status: $status -StartDate: $start_date -EndDate: $end_date -BudgetedCost: $budgeted_cost -ActualCost: $actual_cost -Description: $description -NumberOfLeads: $number_of_leads -NumberOfConvertedLeads: $number_of_converted_leads -""", -) - @dataclass class SimpleSalesforceConfig(BaseConnectorConfig): @@ -134,6 +77,7 @@ class SimpleSalesforceConfig(BaseConnectorConfig): username=self.username, consumer_key=self.consumer_key, privatekey_file=self.private_key_path, + version=SALESFORCE_API_VERSION, ) @@ -148,7 +92,7 @@ class SalesforceIngestDoc(IngestDocCleanupMixin, BaseIngestDoc): if self.record_type == "EmailMessage": record_file = self.record_id + ".eml" elif self.record_type in ["Account", "Lead", "Case", "Campaign"]: - record_file = self.record_id + ".txt" + record_file = self.record_id + ".xml" else: raise MissingCategoryError( f"There are no categories with the name: {self.record_type}", @@ -163,77 +107,25 @@ class SalesforceIngestDoc(IngestDocCleanupMixin, BaseIngestDoc): def _create_full_tmp_dir_path(self): self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True) - def create_account(self, account_json: t.Dict[str, t.Any]) -> str: - """Creates partitionable account file""" - account = ACCOUNT_TEMPLATE.substitute( - id=account_json.get("Id"), - name=account_json.get("Name"), - account_type=account_json.get("Type"), - phone=account_json.get("Phone"), - account_number=account_json.get("AccountNumber"), - website=account_json.get("Website"), - industry=account_json.get("Industry"), - annual_revenue=account_json.get("AnnualRevenue"), - number_employees=account_json.get("NumberOfEmployees"), - ownership=account_json.get("Ownership"), - ticker_symbol=account_json.get("TickerSymbol"), - description=account_json.get("Description"), - rating=account_json.get("Rating"), - dnb_id=account_json.get("DandbCompanyId"), - ) - return dedent(account) + def _xml_for_record(self, record: OrderedDict) -> str: + """Creates partitionable xml file from a record""" + import xml.etree.ElementTree as ET - def create_lead(self, lead_json: t.Dict[str, t.Any]) -> str: - """Creates partitionable lead file""" - lead = LEAD_TEMPLATE.substitute( - id=lead_json.get("Id"), - name=lead_json.get("Name"), - title=lead_json.get("Title"), - company=lead_json.get("Company"), - phone=lead_json.get("Phone"), - email=lead_json.get("Email"), - website=lead_json.get("Website"), - description=lead_json.get("Description"), - lead_source=lead_json.get("LeadSource"), - rating=lead_json.get("Rating"), - status=lead_json.get("Status"), - industry=lead_json.get("Industry"), - ) - return dedent(lead) + def flatten_dict(data, parent, prefix=""): + for key, value in data.items(): + if isinstance(value, OrderedDict): + flatten_dict(value, parent, prefix=f"{prefix}{key}.") + else: + item = ET.Element("item") + item.text = f"{prefix}{key}: {value}" + parent.append(item) - def create_case(self, case_json: t.Dict[str, t.Any]) -> str: - """Creates partitionable case file""" - case = CASE_TEMPLATE.substitute( - id=case_json.get("Id"), - type=case_json.get("Type"), - status=case_json.get("Status"), - reason=case_json.get("Reason"), - origin=case_json.get("Origin"), - subject=case_json.get("Subject"), - priority=case_json.get("Priority"), - description=case_json.get("Description"), - comments=case_json.get("Comments"), - ) - return dedent(case) + root = ET.Element("root") + flatten_dict(record, root) + xml_string = ET.tostring(root, encoding="utf-8", xml_declaration=True).decode() + return xml_string - def create_campaign(self, campaign_json: t.Dict[str, t.Any]) -> str: - """Creates partitionable campaign file""" - campaign = CAMPAIGN_TEMPLATE.substitute( - id=campaign_json.get("Id"), - name=campaign_json.get("Name"), - type=campaign_json.get("Type"), - status=campaign_json.get("Status"), - start_date=campaign_json.get("StartDate"), - end_date=campaign_json.get("EndDate"), - budgeted_cost=campaign_json.get("BudgetedCost"), - actual_cost=campaign_json.get("ActualCost"), - description=campaign_json.get("Description"), - number_of_leads=campaign_json.get("NumberOfLeads"), - number_of_converted_leads=campaign_json.get("NumberOfConvertedLeads"), - ) - return dedent(campaign) - - def create_eml(self, email_json: t.Dict[str, t.Any]) -> str: + def _eml_for_record(self, email_json: t.Dict[str, t.Any]) -> str: """Recreates standard expected .eml format using template.""" eml = EMAIL_TEMPLATE.substitute( date=formatdate(parser.parse(email_json.get("MessageDate")).timestamp()), @@ -242,9 +134,29 @@ class SalesforceIngestDoc(IngestDocCleanupMixin, BaseIngestDoc): from_email=email_json.get("FromAddress"), to_email=email_json.get("ToAddress"), textbody=email_json.get("TextBody"), + # TODO: This is a hack to get emails to process correctly. + # The HTML partitioner seems to have issues with
and text without tags like

+ htmlbody=email_json.get("HtmlBody", "") # "" because you can't .replace None + .replace("
", "

") + .replace(" None: # type: ignore + date_format = "%Y-%m-%dT%H:%M:%S.000+0000" + self.source_metadata = SourceMetadata( + date_created=datetime.strptime(record_json["CreatedDate"], date_format).isoformat(), + date_modified=datetime.strptime( + record_json["LastModifiedDate"], + date_format, + ).isoformat(), + # SystemModstamp is Timestamp if record has been modified by person or automated system + version=record_json.get("SystemModstamp"), + source_url=record_json["attributes"].get("url"), + exists=True, + ) + + @SourceConnectionError.wrap @BaseIngestDoc.skip_if_file_exists def get_file(self): """Saves individual json records locally.""" @@ -258,22 +170,16 @@ class SalesforceIngestDoc(IngestDocCleanupMixin, BaseIngestDoc): f"select FIELDS(STANDARD) from {self.record_type} where Id='{self.record_id}'", )["records"][0] + self.update_source_metadata(record) + try: if self.record_type == "EmailMessage": - formatted_record = self.create_eml(record) - elif self.record_type == "Account": - formatted_record = self.create_account(record) - elif self.record_type == "Lead": - formatted_record = self.create_lead(record) - elif self.record_type == "Case": - formatted_record = self.create_case(record) - elif self.record_type == "Campaign": - formatted_record = self.create_campaign(record) + document = self._eml_for_record(record) else: - raise ValueError(f"record type not recognized: {self.record_type}") + document = self._xml_for_record(record) with open(self._tmp_download_file(), "w") as page_file: - page_file.write(formatted_record) + page_file.write(document) except Exception as e: logger.error( diff --git a/unstructured/ingest/processor.py b/unstructured/ingest/processor.py index 8d075e6b3..a133b7273 100644 --- a/unstructured/ingest/processor.py +++ b/unstructured/ingest/processor.py @@ -74,9 +74,10 @@ class Processor: if not docs: return - # Debugging tip: use the below line and comment out the mp.Pool loop + # Debugging tip: use the below lines and comment out the mp.Pool loop # block to remain in single process - # self.doc_processor_fn(docs[0]) + # json_docs = [doc.to_json() for doc in docs] + # self.doc_processor_fn(json_docs[0]) logger.info(f"Processing {len(docs)} docs") json_docs = [doc.to_json() for doc in docs] with mp.Pool(