ragflow/common/data_source/sharepoint_connector.py
Kevin Hu 3e5a39482e
Feat: Support multiple data sources synchronizations (#10954)
### What problem does this PR solve?
#10953

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2025-11-03 19:59:18 +08:00

121 lines
4.6 KiB
Python

"""SharePoint connector"""
from typing import Any
import msal
from office365.graph_client import GraphClient
from office365.runtime.client_request import ClientRequestException
from office365.sharepoint.client_context import ClientContext
from common.data_source.config import INDEX_BATCH_SIZE
from common.data_source.exceptions import ConnectorValidationError, ConnectorMissingCredentialError
from common.data_source.interfaces import (
CheckpointedConnectorWithPermSync,
SecondsSinceUnixEpoch,
SlimConnectorWithPermSync
)
from common.data_source.models import (
ConnectorCheckpoint
)
class SharePointConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync):
"""SharePoint connector for accessing SharePoint sites and documents"""
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
self.batch_size = batch_size
self.sharepoint_client = None
self.graph_client = None
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
"""Load SharePoint credentials"""
try:
tenant_id = credentials.get("tenant_id")
client_id = credentials.get("client_id")
client_secret = credentials.get("client_secret")
site_url = credentials.get("site_url")
if not all([tenant_id, client_id, client_secret, site_url]):
raise ConnectorMissingCredentialError("SharePoint credentials are incomplete")
# Create MSAL confidential client
app = msal.ConfidentialClientApplication(
client_id=client_id,
client_credential=client_secret,
authority=f"https://login.microsoftonline.com/{tenant_id}"
)
# Get access token
result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" not in result:
raise ConnectorMissingCredentialError("Failed to acquire SharePoint access token")
# Create Graph client
self.graph_client = GraphClient(result["access_token"])
# Create SharePoint client context
self.sharepoint_client = ClientContext(site_url).with_access_token(result["access_token"])
return None
except Exception as e:
raise ConnectorMissingCredentialError(f"SharePoint: {e}")
def validate_connector_settings(self) -> None:
"""Validate SharePoint connector settings"""
if not self.sharepoint_client or not self.graph_client:
raise ConnectorMissingCredentialError("SharePoint")
try:
# Test connection by getting site info
site = self.sharepoint_client.site.get().execute_query()
if not site:
raise ConnectorValidationError("Failed to access SharePoint site")
except ClientRequestException as e:
if "401" in str(e) or "403" in str(e):
raise ConnectorValidationError("Invalid credentials or insufficient permissions")
else:
raise ConnectorValidationError(f"SharePoint validation error: {e}")
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> Any:
"""Poll SharePoint for recent documents"""
# Simplified implementation - in production this would handle actual polling
return []
def load_from_checkpoint(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
checkpoint: ConnectorCheckpoint,
) -> Any:
"""Load documents from checkpoint"""
# Simplified implementation
return []
def load_from_checkpoint_with_perm_sync(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
checkpoint: ConnectorCheckpoint,
) -> Any:
"""Load documents from checkpoint with permission sync"""
# Simplified implementation
return []
def build_dummy_checkpoint(self) -> ConnectorCheckpoint:
"""Build dummy checkpoint"""
return ConnectorCheckpoint()
def validate_checkpoint_json(self, checkpoint_json: str) -> ConnectorCheckpoint:
"""Validate checkpoint JSON"""
# Simplified implementation
return ConnectorCheckpoint()
def retrieve_all_slim_docs_perm_sync(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: Any = None,
) -> Any:
"""Retrieve all simplified documents with permission sync"""
# Simplified implementation
return []