mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-02 02:53:31 +00:00
Solution to issue https://github.com/Unstructured-IO/unstructured/issues/2321. simple_salesforce API allows for passing private key path or value. This PR introduces this support for Ingest connector. Salesforce parameter "private-key-file" has been renamed to "private-key". It can contain one of following: - path to PEM encoded key file (as string) - key contents (PEM encoded string) If the provided value cannot be parsed as PEM encoded private key, then the file existence is checked. This way private key contents are not exposed to unnecessary underlying function calls.
This commit is contained in:
parent
5581e6a4c4
commit
411aa98bbf
@ -4,6 +4,7 @@
|
||||
|
||||
* **Add "basic" chunking strategy.** Add baseline chunking strategy that includes all shared chunking behaviors without breaking chunks on section or page boundaries.
|
||||
* **Add overlap option for chunking.** Add option to overlap chunks. Intra-chunk and inter-chunk overlap are requested separately. Intra-chunk overlap is applied only to the second and later chunks formed by text-splitting an oversized chunk. Inter-chunk overlap may also be specified; this applies overlap between "normal" (not-oversized) chunks.
|
||||
* **Salesforce connector accepts private key path or value.** Salesforce parameter `private-key-file` has been renamed to `private-key`. Private key can be provided as path to file or file contents.
|
||||
|
||||
### Features
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@ unstructured-ingest \
|
||||
salesforce \
|
||||
--username "$SALESFORCE_USERNAME" \
|
||||
--consumer-key "$SALESFORCE_CONSUMER_KEY" \
|
||||
--private-key-path "$SALESFORCE_PRIVATE_KEY_PATH" \
|
||||
--private-key "$SALESFORCE_PRIVATE_KEY_PATH" \
|
||||
--categories "EmailMessage,Account,Lead,Case,Campaign" \
|
||||
--output-dir salesforce-output \
|
||||
--num-processes 2 \
|
||||
|
||||
@ -4,7 +4,7 @@ unstructured-ingest \
|
||||
salesforce \
|
||||
--username "$SALESFORCE_USERNAME" \
|
||||
--consumer-key "$SALESFORCE_CONSUMER_KEY" \
|
||||
--private-key-path "$SALESFORCE_PRIVATE_KEY_PATH" \
|
||||
--private-key "$SALESFORCE_PRIVATE_KEY_PATH" \
|
||||
--categories "EmailMessage,Account,Lead,Case,Campaign" \
|
||||
--output-dir salesforce-output \
|
||||
--num-processes 2 \
|
||||
|
||||
@ -21,7 +21,7 @@ if __name__ == "__main__":
|
||||
consumer_key=os.getenv("SALESFORCE_CONSUMER_KEY"),
|
||||
),
|
||||
username=os.getenv("SALESFORCE_USERNAME"),
|
||||
private_key_path=os.getenv("SALESFORCE_PRIVATE_KEY_PATH"),
|
||||
private_key=os.getenv("SALESFORCE_PRIVATE_KEY_PATH"),
|
||||
categories=["EmailMessage", "Account", "Lead", "Case", "Campaign"],
|
||||
recursive=True,
|
||||
),
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
# https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_auth_key_and_cert.htm
|
||||
# https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_auth_connected_app.htm
|
||||
|
||||
# private-key-path is the path to the key file
|
||||
# private-key is the path to the key file or key contents
|
||||
|
||||
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||
cd "$SCRIPT_DIR"/../../.. || exit 1
|
||||
@ -20,7 +20,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
salesforce \
|
||||
--username "$SALESFORCE_USERNAME" \
|
||||
--consumer-key "$SALESFORCE_CONSUMER_KEY" \
|
||||
--private-key-path "$SALESFORCE_PRIVATE_KEY_PATH" \
|
||||
--private-key "$SALESFORCE_PRIVATE_KEY_PATH" \
|
||||
--categories "EmailMessage,Account,Lead,Case,Campaign" \
|
||||
--output-dir salesforce-output \
|
||||
--preserve-downloads \
|
||||
|
||||
@ -50,7 +50,7 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
|
||||
--download-dir "$DOWNLOAD_DIR" \
|
||||
--username "$SALESFORCE_USERNAME" \
|
||||
--consumer-key "$SALESFORCE_CONSUMER_KEY" \
|
||||
--private-key-path "$SALESFORCE_PRIVATE_KEY_PATH" \
|
||||
--private-key "$SALESFORCE_PRIVATE_KEY_PATH" \
|
||||
--metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \
|
||||
--num-processes "$max_processes" \
|
||||
--preserve-downloads \
|
||||
|
||||
57
test_unstructured_ingest/unit/test_salesforce_connector.py
Normal file
57
test_unstructured_ingest/unit/test_salesforce_connector.py
Normal file
@ -0,0 +1,57 @@
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import dsa, ec, rsa
|
||||
|
||||
from unstructured.ingest.connector.salesforce import SalesforceAccessConfig
|
||||
|
||||
|
||||
def pkey_to_str(key) -> str:
|
||||
return key.private_bytes(
|
||||
encoding=serialization.Encoding.PEM,
|
||||
format=serialization.PrivateFormat.PKCS8,
|
||||
encryption_algorithm=serialization.NoEncryption(),
|
||||
).decode("utf-8")
|
||||
|
||||
|
||||
def rsa_private_key() -> str:
|
||||
return pkey_to_str(rsa.generate_private_key(0x10001, 512))
|
||||
|
||||
|
||||
def brainpoolp512r1_private_key() -> str:
|
||||
return pkey_to_str(ec.generate_private_key(ec.BrainpoolP512R1))
|
||||
|
||||
|
||||
def dsa_private_key() -> str:
|
||||
return pkey_to_str(dsa.generate_private_key(1024))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("private_key", "private_key_type"),
|
||||
[
|
||||
(rsa_private_key(), str),
|
||||
(brainpoolp512r1_private_key(), str),
|
||||
(dsa_private_key(), str),
|
||||
("some_path/priv.key", Path),
|
||||
],
|
||||
)
|
||||
def test_private_key_type(mocker, private_key, private_key_type):
|
||||
mocked_isfile: MagicMock = mocker.patch("pathlib.Path.is_file")
|
||||
mocked_isfile.return_value = True
|
||||
|
||||
config = SalesforceAccessConfig(consumer_key="asdf", private_key=private_key)
|
||||
actual_pkey_value, actual_pkey_type = config.get_private_key_value_and_type()
|
||||
assert actual_pkey_type == private_key_type
|
||||
assert actual_pkey_value == private_key
|
||||
|
||||
|
||||
def test_private_key_type_fail(mocker):
|
||||
mocked_isfile: MagicMock = mocker.patch("pathlib.Path.is_file")
|
||||
mocked_isfile.return_value = False
|
||||
|
||||
given_nonexistent_path = "some_path/priv.key"
|
||||
with pytest.raises(expected_exception=ValueError):
|
||||
config = SalesforceAccessConfig(consumer_key="asdf", private_key=given_nonexistent_path)
|
||||
config.get_private_key_value_and_type()
|
||||
@ -31,11 +31,11 @@ class SalesforceCliConfig(SimpleSalesforceConfig, CliConfig):
|
||||
help="For the Salesforce JWT auth. Found in Consumer Details.",
|
||||
),
|
||||
click.Option(
|
||||
["--private-key-path"],
|
||||
["--private-key"],
|
||||
required=True,
|
||||
type=click.Path(file_okay=True, exists=True, dir_okay=False),
|
||||
help="Path to the private key for the Salesforce JWT auth. "
|
||||
"Usually named server.key.",
|
||||
type=str,
|
||||
help="Path to the private key or its contents for the Salesforce JWT auth. "
|
||||
"Key file is usually named server.key.",
|
||||
),
|
||||
click.Option(
|
||||
["--categories"],
|
||||
|
||||
@ -63,6 +63,23 @@ $htmlbody
|
||||
@dataclass
|
||||
class SalesforceAccessConfig(AccessConfig):
|
||||
consumer_key: str = enhanced_field(sensitive=True)
|
||||
private_key: str = enhanced_field(sensitive=True)
|
||||
|
||||
@requires_dependencies(["cryptography"])
|
||||
def get_private_key_value_and_type(self) -> t.Tuple[str, t.Type]:
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
|
||||
try:
|
||||
serialization.load_pem_private_key(data=self.private_key.encode("utf-8"), password=None)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
return self.private_key, str
|
||||
|
||||
if Path(self.private_key).is_file():
|
||||
return self.private_key, Path
|
||||
|
||||
raise ValueError("private_key does not contain PEM private key or path")
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -72,17 +89,19 @@ class SimpleSalesforceConfig(BaseConnectorConfig):
|
||||
access_config: SalesforceAccessConfig
|
||||
categories: t.List[str]
|
||||
username: str
|
||||
private_key_path: str
|
||||
recursive: bool = False
|
||||
|
||||
@requires_dependencies(["simple_salesforce"], extras="salesforce")
|
||||
def get_client(self):
|
||||
from simple_salesforce import Salesforce
|
||||
|
||||
pkey_value, pkey_type = self.access_config.get_private_key_value_and_type()
|
||||
|
||||
return Salesforce(
|
||||
username=self.username,
|
||||
consumer_key=self.access_config.consumer_key,
|
||||
privatekey_file=self.private_key_path,
|
||||
privatekey_file=pkey_value if pkey_type is Path else None,
|
||||
privatekey=pkey_value if pkey_type is str else None,
|
||||
version=SALESFORCE_API_VERSION,
|
||||
)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user