diff --git a/CHANGELOG.md b/CHANGELOG.md
index 26414ce65..7029840db 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,11 @@
-## 0.10.17-dev6
+## 0.10.17-dev7
### Enhancements
* **Adds data source properties to SharePoint, Outlook, Onedrive, Reddit, and Slack connectors** These properties (date_created, date_modified, version, source_url, record_locator) are written to element metadata during ingest, mapping elements to information about the document source from which they derive. This functionality enables downstream applications to reveal source document applications, e.g. a link to a GDrive doc, Salesforce record, etc.
* **Add functionality to save embedded images in PDF's separately as images** This allows users to save embedded images in PDF's separately as images, given some directory path. The saved image path is written to the metadata for the Image element. Downstream applications may benefit by providing users with image links from relevant "hits."
* **Azure Cognite Search destination connector** New Azure Cognitive Search destination connector added to ingest CLI. Users may now use `unstructured-ingest` to write partitioned data from over 20 data sources (so far) to an Azure Cognitive Search index.
+* **Improves salesforce partitioning** Partitions Salesforce data as xlm instead of text for improved detail and flexibility. Partitions htmlbody instead of textbody for Salesforce emails. Importance: Allows all Salesforce fields to be ingested and gives Salesforce emails more detailed partitioning.
### Features
diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9EIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9EIAU.json
index b873afc4f..13a10d93c 100644
--- a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9EIAU.json
+++ b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9EIAU.json
@@ -1,28 +1,99 @@
[
+ {
+ "type": "Title",
+ "element_id": "aa611cb13dc326fcc963f235dd44fe19",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "attributes.type: Campaign"
+ },
+ {
+ "type": "Title",
+ "element_id": "f8870b3af3522dbf19218c2da5fea3a6",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "attributes.url: /services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU"
+ },
{
"type": "UncategorizedText",
"element_id": "d6a8689a12ad0cd0314b04e1c2cee3c9",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Id: 701Hu000001eX9EIAU"
},
+ {
+ "type": "NarrativeText",
+ "element_id": "d5c5abf10d418d9ead0dc394cfce036f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "IsDeleted: False"
+ },
{
"type": "Title",
"element_id": "69d9d94f0bc4b8d425fa99dce2b78311",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Name: GC Product Webinar - Jan 7, 2002"
},
+ {
+ "type": "Title",
+ "element_id": "fe183f24b67677f16a0243a4ddc33e26",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ParentId: None"
+ },
{
"type": "Title",
"element_id": "f80d0033c7e5a8d6ae66778815e33f35",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Type: Webinar"
},
@@ -30,8 +101,13 @@
"type": "Title",
"element_id": "ad1b8a8ebbde05c57a773f60045de6f6",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Status: Completed"
},
@@ -39,8 +115,13 @@
"type": "UncategorizedText",
"element_id": "08336889c7ebb4ba297a396eb072d83c",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "StartDate: 2023-01-29"
},
@@ -48,17 +129,41 @@
"type": "UncategorizedText",
"element_id": "887de29c98087cc9d07b242432cff930",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "EndDate: 2023-01-29"
},
+ {
+ "type": "Title",
+ "element_id": "a5ee71fd9f08bb6b06fa5431b5f7f05f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ExpectedRevenue: 3000000.0"
+ },
{
"type": "Title",
"element_id": "ffe62693f34276315c62d28b06005bcf",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "BudgetedCost: 10000.0"
},
@@ -66,17 +171,69 @@
"type": "Title",
"element_id": "5739187b01834fedcc2362b5d3841d07",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "ActualCost: 11400.0"
},
+ {
+ "type": "Title",
+ "element_id": "270ecd02e255a8bfd922acf3f2d8b1ac",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ExpectedResponse: 10.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "fd2a3392a4ae4eec64c5e089b629d17c",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberSent: 5000.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "26a526fe045853810a4e7fe8b770b26e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "IsActive: True"
+ },
{
"type": "Title",
"element_id": "8750ec1f6f59b282d104b919c7ffab0f",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Description: None"
},
@@ -84,8 +241,13 @@
"type": "Title",
"element_id": "54e16b100bf2118d4c7c18c3f93e2223",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "NumberOfLeads: 0"
},
@@ -93,9 +255,238 @@
"type": "Title",
"element_id": "db1ed63f2ed83e51f75619047c417e49",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "NumberOfConvertedLeads: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "9dba8c11c1bf3f3e308445ae74489cc1",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfContacts: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "142cc644728caa77d46cb28875d8ce87",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfResponses: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "98ac718353f4a43493b23c7e9b492e7f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfOpportunities: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "dcb99328bf342d4a482a0a49c6de6141",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfWonOpportunities: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "e25dfbf0a1c88cdf6656e608124a768f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "AmountAllOpportunities: 0.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "b9df436714f2480e46280cb066f7f56e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "AmountWonOpportunities: 0.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "7d5637770b00a3a3aa72ee14bfd23d24",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "OwnerId: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "47f5c9e49e63fd1cd28b6e807ddab95a",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CreatedDate: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "ca6b08cabe001c2e8926a751b8dc0a6e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CreatedById: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "5ca9d0a0ae1c9a4e10ba2750b17e136c",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastModifiedDate: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "6830685f5e226de71ad73b18f844fb9b",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastModifiedById: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "a0e91d7714e8a62170ac1073cd7befd9",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "SystemModstamp: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "dc125e8de9d761a3eeea22aa8973baa8",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastActivityDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "531505f0a6d181a8f21e57c60dc7a55b",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastViewedDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "c9a06c69fe4b81e0f8b9a055f5d9b03f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastReferencedDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "d35500dc646c6d074c74d5170fba1e95",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9EIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CampaignMemberRecordTypeId: None"
}
]
\ No newline at end of file
diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9FIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9FIAU.json
index 89519a9e6..348448455 100644
--- a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9FIAU.json
+++ b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9FIAU.json
@@ -1,28 +1,99 @@
[
+ {
+ "type": "Title",
+ "element_id": "aa611cb13dc326fcc963f235dd44fe19",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "attributes.type: Campaign"
+ },
+ {
+ "type": "Title",
+ "element_id": "ef2f4870487db654c7d2b57f829814b5",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "attributes.url: /services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU"
+ },
{
"type": "UncategorizedText",
"element_id": "582957afcab0b1f0df8e414c601b679a",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Id: 701Hu000001eX9FIAU"
},
+ {
+ "type": "NarrativeText",
+ "element_id": "d5c5abf10d418d9ead0dc394cfce036f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "IsDeleted: False"
+ },
{
"type": "Title",
"element_id": "b1097935922e3ea926a35ace5fe68a61",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Name: User Conference - Jun 17-19, 2002"
},
+ {
+ "type": "Title",
+ "element_id": "fe183f24b67677f16a0243a4ddc33e26",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ParentId: None"
+ },
{
"type": "Title",
"element_id": "0c29c88f4b31c6f5caf8b36885c4c1c6",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Type: Conference"
},
@@ -30,8 +101,13 @@
"type": "Title",
"element_id": "b537e10ee8e78ec7a18792eaa76ce0e4",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Status: Planned"
},
@@ -39,8 +115,13 @@
"type": "UncategorizedText",
"element_id": "6cbe5bf59dabd290307e79547b2a86f2",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "StartDate: 2023-07-09"
},
@@ -48,17 +129,41 @@
"type": "UncategorizedText",
"element_id": "6cb3c2919b2c47be4b187d17c316a539",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "EndDate: 2023-07-11"
},
+ {
+ "type": "Title",
+ "element_id": "cdf407dd243fc297a308f721260c21a8",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ExpectedRevenue: 5500000.0"
+ },
{
"type": "Title",
"element_id": "3062c8ae2aae8afe4e38d8fa2a6ea248",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "BudgetedCost: 100000.0"
},
@@ -66,17 +171,69 @@
"type": "Title",
"element_id": "754630afa9bba639e59a8a80785f2766",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "ActualCost: None"
},
+ {
+ "type": "Title",
+ "element_id": "ef26c11c41150d99466a68eeec51b640",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ExpectedResponse: 15.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "7fd5b7e9536d536d0eba5047b11a5cb8",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberSent: 40000.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "26a526fe045853810a4e7fe8b770b26e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "IsActive: True"
+ },
{
"type": "Title",
"element_id": "8750ec1f6f59b282d104b919c7ffab0f",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Description: None"
},
@@ -84,8 +241,13 @@
"type": "Title",
"element_id": "54e16b100bf2118d4c7c18c3f93e2223",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "NumberOfLeads: 0"
},
@@ -93,9 +255,238 @@
"type": "Title",
"element_id": "db1ed63f2ed83e51f75619047c417e49",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "NumberOfConvertedLeads: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "9dba8c11c1bf3f3e308445ae74489cc1",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfContacts: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "142cc644728caa77d46cb28875d8ce87",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfResponses: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "98ac718353f4a43493b23c7e9b492e7f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfOpportunities: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "dcb99328bf342d4a482a0a49c6de6141",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfWonOpportunities: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "e25dfbf0a1c88cdf6656e608124a768f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "AmountAllOpportunities: 0.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "b9df436714f2480e46280cb066f7f56e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "AmountWonOpportunities: 0.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "7d5637770b00a3a3aa72ee14bfd23d24",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "OwnerId: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "47f5c9e49e63fd1cd28b6e807ddab95a",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CreatedDate: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "ca6b08cabe001c2e8926a751b8dc0a6e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CreatedById: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "5ca9d0a0ae1c9a4e10ba2750b17e136c",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastModifiedDate: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "6830685f5e226de71ad73b18f844fb9b",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastModifiedById: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "a0e91d7714e8a62170ac1073cd7befd9",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "SystemModstamp: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "dc125e8de9d761a3eeea22aa8973baa8",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastActivityDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "531505f0a6d181a8f21e57c60dc7a55b",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastViewedDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "c9a06c69fe4b81e0f8b9a055f5d9b03f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastReferencedDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "d35500dc646c6d074c74d5170fba1e95",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9FIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CampaignMemberRecordTypeId: None"
}
]
\ No newline at end of file
diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9GIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9GIAU.json
index d87da8362..6303c6076 100644
--- a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9GIAU.json
+++ b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9GIAU.json
@@ -1,28 +1,99 @@
[
+ {
+ "type": "Title",
+ "element_id": "aa611cb13dc326fcc963f235dd44fe19",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "attributes.type: Campaign"
+ },
+ {
+ "type": "Title",
+ "element_id": "cd41ba564c2035ae1d02f5e3650f39e1",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "attributes.url: /services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU"
+ },
{
"type": "UncategorizedText",
"element_id": "153e1fa63953e7e19f9004e0253eab68",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Id: 701Hu000001eX9GIAU"
},
+ {
+ "type": "NarrativeText",
+ "element_id": "d5c5abf10d418d9ead0dc394cfce036f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "IsDeleted: False"
+ },
{
"type": "Title",
"element_id": "9b9c5e71eff6a483e85da52d1a1f1005",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Name: DM Campaign to Top Customers - Nov 12-23, 2001"
},
+ {
+ "type": "Title",
+ "element_id": "fe183f24b67677f16a0243a4ddc33e26",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ParentId: None"
+ },
{
"type": "Title",
"element_id": "28fa7658294d152358d23d8bde3c9e56",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Type: Direct Mail"
},
@@ -30,8 +101,13 @@
"type": "Title",
"element_id": "ad1b8a8ebbde05c57a773f60045de6f6",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Status: Completed"
},
@@ -39,8 +115,13 @@
"type": "UncategorizedText",
"element_id": "59b06bd535fc60e446ce4f6db6392a8d",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "StartDate: 2022-12-04"
},
@@ -48,17 +129,41 @@
"type": "UncategorizedText",
"element_id": "a7caa29a82d158a422b901babed10321",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "EndDate: 2022-12-15"
},
+ {
+ "type": "Title",
+ "element_id": "16c237251889480d8e0e5234cf198e2b",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ExpectedRevenue: 2500000.0"
+ },
{
"type": "Title",
"element_id": "a830028696ccdc6c73f26e2f5f0b3e0d",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "BudgetedCost: 25000.0"
},
@@ -66,17 +171,69 @@
"type": "Title",
"element_id": "f393ef1a67e6b638d6825e00ffa85b5e",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "ActualCost: 23500.0"
},
+ {
+ "type": "Title",
+ "element_id": "7dbe784238042675737de400c3112644",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ExpectedResponse: 7.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "fd49829c9a39ed51c7cf5dab24ec4e88",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberSent: 15000.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "26a526fe045853810a4e7fe8b770b26e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "IsActive: True"
+ },
{
"type": "Title",
"element_id": "8750ec1f6f59b282d104b919c7ffab0f",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Description: None"
},
@@ -84,8 +241,13 @@
"type": "Title",
"element_id": "54e16b100bf2118d4c7c18c3f93e2223",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "NumberOfLeads: 0"
},
@@ -93,9 +255,238 @@
"type": "Title",
"element_id": "db1ed63f2ed83e51f75619047c417e49",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "NumberOfConvertedLeads: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "9dba8c11c1bf3f3e308445ae74489cc1",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfContacts: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "142cc644728caa77d46cb28875d8ce87",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfResponses: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "98ac718353f4a43493b23c7e9b492e7f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfOpportunities: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "dcb99328bf342d4a482a0a49c6de6141",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfWonOpportunities: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "e25dfbf0a1c88cdf6656e608124a768f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "AmountAllOpportunities: 0.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "b9df436714f2480e46280cb066f7f56e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "AmountWonOpportunities: 0.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "7d5637770b00a3a3aa72ee14bfd23d24",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "OwnerId: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "47f5c9e49e63fd1cd28b6e807ddab95a",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CreatedDate: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "ca6b08cabe001c2e8926a751b8dc0a6e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CreatedById: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "5ca9d0a0ae1c9a4e10ba2750b17e136c",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastModifiedDate: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "6830685f5e226de71ad73b18f844fb9b",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastModifiedById: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "a0e91d7714e8a62170ac1073cd7befd9",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "SystemModstamp: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "dc125e8de9d761a3eeea22aa8973baa8",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastActivityDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "531505f0a6d181a8f21e57c60dc7a55b",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastViewedDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "c9a06c69fe4b81e0f8b9a055f5d9b03f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastReferencedDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "d35500dc646c6d074c74d5170fba1e95",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9GIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CampaignMemberRecordTypeId: None"
}
]
\ No newline at end of file
diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9HIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9HIAU.json
index 36753980b..afd222d7b 100644
--- a/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9HIAU.json
+++ b/test_unstructured_ingest/expected-structured-output/salesforce/Campaign/701Hu000001eX9HIAU.json
@@ -1,28 +1,99 @@
[
+ {
+ "type": "Title",
+ "element_id": "aa611cb13dc326fcc963f235dd44fe19",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "attributes.type: Campaign"
+ },
+ {
+ "type": "Title",
+ "element_id": "25bfa76752b584b3234d689bcd20bb82",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "attributes.url: /services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU"
+ },
{
"type": "UncategorizedText",
"element_id": "697cb5681a4f17c6cb712dfce64ae2d1",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Id: 701Hu000001eX9HIAU"
},
+ {
+ "type": "NarrativeText",
+ "element_id": "d5c5abf10d418d9ead0dc394cfce036f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "IsDeleted: False"
+ },
{
"type": "Title",
"element_id": "37c6c6fcf92fd42c703ad967a4691a32",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Name: International Electrical Engineers Association Trade Show - Mar 4-5, 2002"
},
+ {
+ "type": "Title",
+ "element_id": "fe183f24b67677f16a0243a4ddc33e26",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ParentId: None"
+ },
{
"type": "Title",
"element_id": "a4b5a79024228eb84bcefe4bfe8bce47",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Type: Trade Show"
},
@@ -30,8 +101,13 @@
"type": "Title",
"element_id": "b537e10ee8e78ec7a18792eaa76ce0e4",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Status: Planned"
},
@@ -39,8 +115,13 @@
"type": "UncategorizedText",
"element_id": "5d68899808565a0eb340e7ce9a42c981",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "StartDate: 2023-03-26"
},
@@ -48,17 +129,41 @@
"type": "UncategorizedText",
"element_id": "eb23b79d3e286bef615fa4bf7bbe6c6d",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "EndDate: 2023-03-27"
},
+ {
+ "type": "Title",
+ "element_id": "1614a2aec5fd66fd65f6b06ab5164043",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ExpectedRevenue: 8500000.0"
+ },
{
"type": "Title",
"element_id": "2bb64865cdfea5db0f000dde162fc372",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "BudgetedCost: 50000.0"
},
@@ -66,17 +171,69 @@
"type": "Title",
"element_id": "754630afa9bba639e59a8a80785f2766",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "ActualCost: None"
},
+ {
+ "type": "Title",
+ "element_id": "bb7195d3ff60f4ab8878cc91c69c0963",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "ExpectedResponse: 0.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "cb20be3dba5cef028c220c4592939f47",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberSent: 0.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "26a526fe045853810a4e7fe8b770b26e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "IsActive: True"
+ },
{
"type": "Title",
"element_id": "8750ec1f6f59b282d104b919c7ffab0f",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "Description: None"
},
@@ -84,8 +241,13 @@
"type": "Title",
"element_id": "54e16b100bf2118d4c7c18c3f93e2223",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "NumberOfLeads: 0"
},
@@ -93,9 +255,238 @@
"type": "Title",
"element_id": "db1ed63f2ed83e51f75619047c417e49",
"metadata": {
- "data_source": {},
- "filetype": "text/plain"
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
},
"text": "NumberOfConvertedLeads: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "9dba8c11c1bf3f3e308445ae74489cc1",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfContacts: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "142cc644728caa77d46cb28875d8ce87",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfResponses: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "98ac718353f4a43493b23c7e9b492e7f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfOpportunities: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "dcb99328bf342d4a482a0a49c6de6141",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "NumberOfWonOpportunities: 0"
+ },
+ {
+ "type": "Title",
+ "element_id": "e25dfbf0a1c88cdf6656e608124a768f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "AmountAllOpportunities: 0.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "b9df436714f2480e46280cb066f7f56e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "AmountWonOpportunities: 0.0"
+ },
+ {
+ "type": "Title",
+ "element_id": "7d5637770b00a3a3aa72ee14bfd23d24",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "OwnerId: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "47f5c9e49e63fd1cd28b6e807ddab95a",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CreatedDate: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "ca6b08cabe001c2e8926a751b8dc0a6e",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CreatedById: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "5ca9d0a0ae1c9a4e10ba2750b17e136c",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastModifiedDate: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "6830685f5e226de71ad73b18f844fb9b",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastModifiedById: 005Hu00000Owz9uIAB"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "a0e91d7714e8a62170ac1073cd7befd9",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "SystemModstamp: 2023-08-18T11:48:08.000+0000"
+ },
+ {
+ "type": "Title",
+ "element_id": "dc125e8de9d761a3eeea22aa8973baa8",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastActivityDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "531505f0a6d181a8f21e57c60dc7a55b",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastViewedDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "c9a06c69fe4b81e0f8b9a055f5d9b03f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "LastReferencedDate: None"
+ },
+ {
+ "type": "Title",
+ "element_id": "d35500dc646c6d074c74d5170fba1e95",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/Campaign/701Hu000001eX9HIAU",
+ "version": "2023-08-18T11:48:08.000+0000",
+ "date_created": "2023-08-18T11:48:08",
+ "date_modified": "2023-08-18T11:48:08"
+ },
+ "filetype": "application/xml"
+ },
+ "text": "CampaignMemberRecordTypeId: None"
}
]
\ No newline at end of file
diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErPIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErPIAU.json
index 3c3dd2f35..1583be1ac 100644
--- a/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErPIAU.json
+++ b/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErPIAU.json
@@ -1,9 +1,14 @@
[
{
"type": "NarrativeText",
- "element_id": "d954fa8e82ded23ebde30b2d53d5f81d",
+ "element_id": "3c0a5d71e07c028ce79bf3fd1a04e0ab",
"metadata": {
- "data_source": {},
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErPIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:34:16",
+ "date_modified": "2023-08-21T14:34:16"
+ },
"filetype": "message/rfc822",
"sent_from": [
"devops+salesforce-connector@unstructured.io"
@@ -13,6 +18,69 @@
],
"subject": "Test of email 1"
},
- "text": "Jane. This is a test of sending you an email from Salesforce!\n\n_____________________________________________________________________\nPowered by Salesforce\nhttp://www.salesforce.com/"
+ "text": "Jane. This is a test of sending you an email from Salesforce!"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "ada246e54712b8cb04cf9eeed042d9c8",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErPIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:34:16",
+ "date_modified": "2023-08-21T14:34:16"
+ },
+ "filetype": "message/rfc822",
+ "sent_from": [
+ "devops+salesforce-connector@unstructured.io"
+ ],
+ "sent_to": [
+ "jane_gray@uoa.edu"
+ ],
+ "subject": "Test of email 1"
+ },
+ "text": "_____________________________________________________________________"
+ },
+ {
+ "type": "Title",
+ "element_id": "b5cfb3867874279548fa1feb3b688171",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErPIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:34:16",
+ "date_modified": "2023-08-21T14:34:16"
+ },
+ "filetype": "message/rfc822",
+ "sent_from": [
+ "devops+salesforce-connector@unstructured.io"
+ ],
+ "sent_to": [
+ "jane_gray@uoa.edu"
+ ],
+ "subject": "Test of email 1"
+ },
+ "text": "Powered by Salesforce"
+ },
+ {
+ "type": "Title",
+ "element_id": "f3a436e93d3660eedd11ac4c7de5d8cf",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErPIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:34:16",
+ "date_modified": "2023-08-21T14:34:16"
+ },
+ "filetype": "message/rfc822",
+ "sent_from": [
+ "devops+salesforce-connector@unstructured.io"
+ ],
+ "sent_to": [
+ "jane_gray@uoa.edu"
+ ],
+ "subject": "Test of email 1"
+ },
+ "text": "http://www.salesforce.com/"
}
]
\ No newline at end of file
diff --git a/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErQIAU.json b/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErQIAU.json
index 903312d7d..4e6303b00 100644
--- a/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErQIAU.json
+++ b/test_unstructured_ingest/expected-structured-output/salesforce/EmailMessage/02sHu00001efErQIAU.json
@@ -1,9 +1,14 @@
[
{
- "type": "NarrativeText",
- "element_id": "f5ac98aa9002453f536877714c5eb88d",
+ "type": "Title",
+ "element_id": "b4e8db308de7be5daf941cdfe9ea77cd",
"metadata": {
- "data_source": {},
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:35:49",
+ "date_modified": "2023-08-20T14:35:55"
+ },
"filetype": "message/rfc822",
"sent_from": [
"devops+salesforce-connector@unstructured.io"
@@ -13,6 +18,132 @@
],
"subject": "Test of Salesforce 2"
},
- "text": "Hey Sean.\n\nTesting email parsing here.\nType: email\n\nJust testing the email system\n\n_____________________________________________________________________\nPowered by Salesforce\nhttp://www.salesforce.com/"
+ "text": "Hey Sean."
+ },
+ {
+ "type": "NarrativeText",
+ "element_id": "383058971777c314cff4cc10960ee84f",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:35:49",
+ "date_modified": "2023-08-20T14:35:55"
+ },
+ "filetype": "message/rfc822",
+ "sent_from": [
+ "devops+salesforce-connector@unstructured.io"
+ ],
+ "sent_to": [
+ "sean@edge.com"
+ ],
+ "subject": "Test of Salesforce 2"
+ },
+ "text": "Testing email parsing here."
+ },
+ {
+ "type": "Title",
+ "element_id": "37e9e141c34a359d036d4760368cf88b",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:35:49",
+ "date_modified": "2023-08-20T14:35:55"
+ },
+ "filetype": "message/rfc822",
+ "sent_from": [
+ "devops+salesforce-connector@unstructured.io"
+ ],
+ "sent_to": [
+ "sean@edge.com"
+ ],
+ "subject": "Test of Salesforce 2"
+ },
+ "text": "Type: email"
+ },
+ {
+ "type": "NarrativeText",
+ "element_id": "36a9f06f033f03c18d12b8318e018b65",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:35:49",
+ "date_modified": "2023-08-20T14:35:55"
+ },
+ "filetype": "message/rfc822",
+ "sent_from": [
+ "devops+salesforce-connector@unstructured.io"
+ ],
+ "sent_to": [
+ "sean@edge.com"
+ ],
+ "subject": "Test of Salesforce 2"
+ },
+ "text": "Just testing the email system"
+ },
+ {
+ "type": "UncategorizedText",
+ "element_id": "ada246e54712b8cb04cf9eeed042d9c8",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:35:49",
+ "date_modified": "2023-08-20T14:35:55"
+ },
+ "filetype": "message/rfc822",
+ "sent_from": [
+ "devops+salesforce-connector@unstructured.io"
+ ],
+ "sent_to": [
+ "sean@edge.com"
+ ],
+ "subject": "Test of Salesforce 2"
+ },
+ "text": "_____________________________________________________________________"
+ },
+ {
+ "type": "Title",
+ "element_id": "b5cfb3867874279548fa1feb3b688171",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:35:49",
+ "date_modified": "2023-08-20T14:35:55"
+ },
+ "filetype": "message/rfc822",
+ "sent_from": [
+ "devops+salesforce-connector@unstructured.io"
+ ],
+ "sent_to": [
+ "sean@edge.com"
+ ],
+ "subject": "Test of Salesforce 2"
+ },
+ "text": "Powered by Salesforce"
+ },
+ {
+ "type": "Title",
+ "element_id": "f3a436e93d3660eedd11ac4c7de5d8cf",
+ "metadata": {
+ "data_source": {
+ "url": "/services/data/v57.0/sobjects/EmailMessage/02sHu00001efErQIAU",
+ "version": "2023-09-14T11:40:03.000+0000",
+ "date_created": "2023-08-20T14:35:49",
+ "date_modified": "2023-08-20T14:35:55"
+ },
+ "filetype": "message/rfc822",
+ "sent_from": [
+ "devops+salesforce-connector@unstructured.io"
+ ],
+ "sent_to": [
+ "sean@edge.com"
+ ],
+ "subject": "Test of Salesforce 2"
+ },
+ "text": "http://www.salesforce.com/"
}
]
\ No newline at end of file
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index 42ab343e2..86af595d8 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.10.17-dev6" # pragma: no cover
+__version__ = "0.10.17-dev7" # pragma: no cover
diff --git a/unstructured/ingest/connector/salesforce.py b/unstructured/ingest/connector/salesforce.py
index a68d3db8e..35c92a43b 100644
--- a/unstructured/ingest/connector/salesforce.py
+++ b/unstructured/ingest/connector/salesforce.py
@@ -9,7 +9,9 @@ https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_de
"""
import os
import typing as t
+from collections import OrderedDict
from dataclasses import dataclass
+from datetime import datetime
from email.utils import formatdate
from pathlib import Path
from string import Template
@@ -17,12 +19,14 @@ from textwrap import dedent
from dateutil import parser # type: ignore
+from unstructured.ingest.error import SourceConnectionError
from unstructured.ingest.interfaces import (
BaseConnectorConfig,
BaseIngestDoc,
BaseSourceConnector,
IngestDocCleanupMixin,
SourceConnectorCleanupMixin,
+ SourceMetadata,
)
from unstructured.ingest.logger import logger
from unstructured.utils import requires_dependencies
@@ -32,8 +36,9 @@ class MissingCategoryError(Exception):
"""There are no categories with that name."""
-ACCEPTED_CATEGORIES = ["Account", "Case", "Campaign", "EmailMessage", "Lead"]
+SALESFORCE_API_VERSION = "57.0"
+ACCEPTED_CATEGORIES = ["Account", "Case", "Campaign", "EmailMessage", "Lead"]
EMAIL_TEMPLATE = Template(
"""MIME-Version: 1.0
@@ -48,73 +53,11 @@ Content-Type: text/plain; charset="UTF-8"
$textbody
--00000000000095c9b205eff92630
Content-Type: text/html; charset="UTF-8"
-$textbody
+$htmlbody
--00000000000095c9b205eff92630--
""",
)
-ACCOUNT_TEMPLATE = Template(
- """Id: $id
-Name: $name
-Type: $account_type
-Phone: $phone
-AccountNumber: $account_number
-Website: $website
-Industry: $industry
-AnnualRevenue: $annual_revenue
-NumberOfEmployees: $number_employees
-Ownership: $ownership
-TickerSymbol: $ticker_symbol
-Description: $description
-Rating: $rating
-DandbCompanyId: $dnb_id
-""",
-)
-
-LEAD_TEMPLATE = Template(
- """Id: $id
-Name: $name
-Title: $title
-Company: $company
-Phone: $phone
-Email: $email
-Website: $website
-Description: $description
-LeadSource: $lead_source
-Rating: $rating
-Status: $status
-Industry: $industry
-""",
-)
-
-CASE_TEMPLATE = Template(
- """Id: $id
-Type: $type
-Status: $status
-Reason: $reason
-Origin: $origin
-Subject: $subject
-Priority: $priority
-Description: $description
-Comments: $comments
-""",
-)
-
-CAMPAIGN_TEMPLATE = Template(
- """Id: $id
-Name: $name
-Type: $type
-Status: $status
-StartDate: $start_date
-EndDate: $end_date
-BudgetedCost: $budgeted_cost
-ActualCost: $actual_cost
-Description: $description
-NumberOfLeads: $number_of_leads
-NumberOfConvertedLeads: $number_of_converted_leads
-""",
-)
-
@dataclass
class SimpleSalesforceConfig(BaseConnectorConfig):
@@ -134,6 +77,7 @@ class SimpleSalesforceConfig(BaseConnectorConfig):
username=self.username,
consumer_key=self.consumer_key,
privatekey_file=self.private_key_path,
+ version=SALESFORCE_API_VERSION,
)
@@ -148,7 +92,7 @@ class SalesforceIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
if self.record_type == "EmailMessage":
record_file = self.record_id + ".eml"
elif self.record_type in ["Account", "Lead", "Case", "Campaign"]:
- record_file = self.record_id + ".txt"
+ record_file = self.record_id + ".xml"
else:
raise MissingCategoryError(
f"There are no categories with the name: {self.record_type}",
@@ -163,77 +107,25 @@ class SalesforceIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
def _create_full_tmp_dir_path(self):
self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
- def create_account(self, account_json: t.Dict[str, t.Any]) -> str:
- """Creates partitionable account file"""
- account = ACCOUNT_TEMPLATE.substitute(
- id=account_json.get("Id"),
- name=account_json.get("Name"),
- account_type=account_json.get("Type"),
- phone=account_json.get("Phone"),
- account_number=account_json.get("AccountNumber"),
- website=account_json.get("Website"),
- industry=account_json.get("Industry"),
- annual_revenue=account_json.get("AnnualRevenue"),
- number_employees=account_json.get("NumberOfEmployees"),
- ownership=account_json.get("Ownership"),
- ticker_symbol=account_json.get("TickerSymbol"),
- description=account_json.get("Description"),
- rating=account_json.get("Rating"),
- dnb_id=account_json.get("DandbCompanyId"),
- )
- return dedent(account)
+ def _xml_for_record(self, record: OrderedDict) -> str:
+ """Creates partitionable xml file from a record"""
+ import xml.etree.ElementTree as ET
- def create_lead(self, lead_json: t.Dict[str, t.Any]) -> str:
- """Creates partitionable lead file"""
- lead = LEAD_TEMPLATE.substitute(
- id=lead_json.get("Id"),
- name=lead_json.get("Name"),
- title=lead_json.get("Title"),
- company=lead_json.get("Company"),
- phone=lead_json.get("Phone"),
- email=lead_json.get("Email"),
- website=lead_json.get("Website"),
- description=lead_json.get("Description"),
- lead_source=lead_json.get("LeadSource"),
- rating=lead_json.get("Rating"),
- status=lead_json.get("Status"),
- industry=lead_json.get("Industry"),
- )
- return dedent(lead)
+ def flatten_dict(data, parent, prefix=""):
+ for key, value in data.items():
+ if isinstance(value, OrderedDict):
+ flatten_dict(value, parent, prefix=f"{prefix}{key}.")
+ else:
+ item = ET.Element("item")
+ item.text = f"{prefix}{key}: {value}"
+ parent.append(item)
- def create_case(self, case_json: t.Dict[str, t.Any]) -> str:
- """Creates partitionable case file"""
- case = CASE_TEMPLATE.substitute(
- id=case_json.get("Id"),
- type=case_json.get("Type"),
- status=case_json.get("Status"),
- reason=case_json.get("Reason"),
- origin=case_json.get("Origin"),
- subject=case_json.get("Subject"),
- priority=case_json.get("Priority"),
- description=case_json.get("Description"),
- comments=case_json.get("Comments"),
- )
- return dedent(case)
+ root = ET.Element("root")
+ flatten_dict(record, root)
+ xml_string = ET.tostring(root, encoding="utf-8", xml_declaration=True).decode()
+ return xml_string
- def create_campaign(self, campaign_json: t.Dict[str, t.Any]) -> str:
- """Creates partitionable campaign file"""
- campaign = CAMPAIGN_TEMPLATE.substitute(
- id=campaign_json.get("Id"),
- name=campaign_json.get("Name"),
- type=campaign_json.get("Type"),
- status=campaign_json.get("Status"),
- start_date=campaign_json.get("StartDate"),
- end_date=campaign_json.get("EndDate"),
- budgeted_cost=campaign_json.get("BudgetedCost"),
- actual_cost=campaign_json.get("ActualCost"),
- description=campaign_json.get("Description"),
- number_of_leads=campaign_json.get("NumberOfLeads"),
- number_of_converted_leads=campaign_json.get("NumberOfConvertedLeads"),
- )
- return dedent(campaign)
-
- def create_eml(self, email_json: t.Dict[str, t.Any]) -> str:
+ def _eml_for_record(self, email_json: t.Dict[str, t.Any]) -> str:
"""Recreates standard expected .eml format using template."""
eml = EMAIL_TEMPLATE.substitute(
date=formatdate(parser.parse(email_json.get("MessageDate")).timestamp()),
@@ -242,9 +134,29 @@ class SalesforceIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
from_email=email_json.get("FromAddress"),
to_email=email_json.get("ToAddress"),
textbody=email_json.get("TextBody"),
+ # TODO: This is a hack to get emails to process correctly.
+ # The HTML partitioner seems to have issues with
and text without tags like
+ htmlbody=email_json.get("HtmlBody", "") # "" because you can't .replace None
+ .replace("
", "
") + .replace("
None: # type: ignore + date_format = "%Y-%m-%dT%H:%M:%S.000+0000" + self.source_metadata = SourceMetadata( + date_created=datetime.strptime(record_json["CreatedDate"], date_format).isoformat(), + date_modified=datetime.strptime( + record_json["LastModifiedDate"], + date_format, + ).isoformat(), + # SystemModstamp is Timestamp if record has been modified by person or automated system + version=record_json.get("SystemModstamp"), + source_url=record_json["attributes"].get("url"), + exists=True, + ) + + @SourceConnectionError.wrap @BaseIngestDoc.skip_if_file_exists def get_file(self): """Saves individual json records locally.""" @@ -258,22 +170,16 @@ class SalesforceIngestDoc(IngestDocCleanupMixin, BaseIngestDoc): f"select FIELDS(STANDARD) from {self.record_type} where Id='{self.record_id}'", )["records"][0] + self.update_source_metadata(record) + try: if self.record_type == "EmailMessage": - formatted_record = self.create_eml(record) - elif self.record_type == "Account": - formatted_record = self.create_account(record) - elif self.record_type == "Lead": - formatted_record = self.create_lead(record) - elif self.record_type == "Case": - formatted_record = self.create_case(record) - elif self.record_type == "Campaign": - formatted_record = self.create_campaign(record) + document = self._eml_for_record(record) else: - raise ValueError(f"record type not recognized: {self.record_type}") + document = self._xml_for_record(record) with open(self._tmp_download_file(), "w") as page_file: - page_file.write(formatted_record) + page_file.write(document) except Exception as e: logger.error( diff --git a/unstructured/ingest/processor.py b/unstructured/ingest/processor.py index 8d075e6b3..a133b7273 100644 --- a/unstructured/ingest/processor.py +++ b/unstructured/ingest/processor.py @@ -74,9 +74,10 @@ class Processor: if not docs: return - # Debugging tip: use the below line and comment out the mp.Pool loop + # Debugging tip: use the below lines and comment out the mp.Pool loop # block to remain in single process - # self.doc_processor_fn(docs[0]) + # json_docs = [doc.to_json() for doc in docs] + # self.doc_processor_fn(json_docs[0]) logger.info(f"Processing {len(docs)} docs") json_docs = [doc.to_json() for doc in docs] with mp.Pool(