mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-06 11:57:23 +00:00
### What problem does this PR solve? #10953 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
121 lines
4.6 KiB
Python
121 lines
4.6 KiB
Python
"""SharePoint connector"""
|
|
|
|
from typing import Any
|
|
import msal
|
|
from office365.graph_client import GraphClient
|
|
from office365.runtime.client_request import ClientRequestException
|
|
from office365.sharepoint.client_context import ClientContext
|
|
|
|
from common.data_source.config import INDEX_BATCH_SIZE
|
|
from common.data_source.exceptions import ConnectorValidationError, ConnectorMissingCredentialError
|
|
from common.data_source.interfaces import (
|
|
CheckpointedConnectorWithPermSync,
|
|
SecondsSinceUnixEpoch,
|
|
SlimConnectorWithPermSync
|
|
)
|
|
from common.data_source.models import (
|
|
ConnectorCheckpoint
|
|
)
|
|
|
|
|
|
class SharePointConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync):
|
|
"""SharePoint connector for accessing SharePoint sites and documents"""
|
|
|
|
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
|
|
self.batch_size = batch_size
|
|
self.sharepoint_client = None
|
|
self.graph_client = None
|
|
|
|
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
|
"""Load SharePoint credentials"""
|
|
try:
|
|
tenant_id = credentials.get("tenant_id")
|
|
client_id = credentials.get("client_id")
|
|
client_secret = credentials.get("client_secret")
|
|
site_url = credentials.get("site_url")
|
|
|
|
if not all([tenant_id, client_id, client_secret, site_url]):
|
|
raise ConnectorMissingCredentialError("SharePoint credentials are incomplete")
|
|
|
|
# Create MSAL confidential client
|
|
app = msal.ConfidentialClientApplication(
|
|
client_id=client_id,
|
|
client_credential=client_secret,
|
|
authority=f"https://login.microsoftonline.com/{tenant_id}"
|
|
)
|
|
|
|
# Get access token
|
|
result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
|
|
|
|
if "access_token" not in result:
|
|
raise ConnectorMissingCredentialError("Failed to acquire SharePoint access token")
|
|
|
|
# Create Graph client
|
|
self.graph_client = GraphClient(result["access_token"])
|
|
|
|
# Create SharePoint client context
|
|
self.sharepoint_client = ClientContext(site_url).with_access_token(result["access_token"])
|
|
|
|
return None
|
|
except Exception as e:
|
|
raise ConnectorMissingCredentialError(f"SharePoint: {e}")
|
|
|
|
def validate_connector_settings(self) -> None:
|
|
"""Validate SharePoint connector settings"""
|
|
if not self.sharepoint_client or not self.graph_client:
|
|
raise ConnectorMissingCredentialError("SharePoint")
|
|
|
|
try:
|
|
# Test connection by getting site info
|
|
site = self.sharepoint_client.site.get().execute_query()
|
|
if not site:
|
|
raise ConnectorValidationError("Failed to access SharePoint site")
|
|
except ClientRequestException as e:
|
|
if "401" in str(e) or "403" in str(e):
|
|
raise ConnectorValidationError("Invalid credentials or insufficient permissions")
|
|
else:
|
|
raise ConnectorValidationError(f"SharePoint validation error: {e}")
|
|
|
|
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> Any:
|
|
"""Poll SharePoint for recent documents"""
|
|
# Simplified implementation - in production this would handle actual polling
|
|
return []
|
|
|
|
def load_from_checkpoint(
|
|
self,
|
|
start: SecondsSinceUnixEpoch,
|
|
end: SecondsSinceUnixEpoch,
|
|
checkpoint: ConnectorCheckpoint,
|
|
) -> Any:
|
|
"""Load documents from checkpoint"""
|
|
# Simplified implementation
|
|
return []
|
|
|
|
def load_from_checkpoint_with_perm_sync(
|
|
self,
|
|
start: SecondsSinceUnixEpoch,
|
|
end: SecondsSinceUnixEpoch,
|
|
checkpoint: ConnectorCheckpoint,
|
|
) -> Any:
|
|
"""Load documents from checkpoint with permission sync"""
|
|
# Simplified implementation
|
|
return []
|
|
|
|
def build_dummy_checkpoint(self) -> ConnectorCheckpoint:
|
|
"""Build dummy checkpoint"""
|
|
return ConnectorCheckpoint()
|
|
|
|
def validate_checkpoint_json(self, checkpoint_json: str) -> ConnectorCheckpoint:
|
|
"""Validate checkpoint JSON"""
|
|
# Simplified implementation
|
|
return ConnectorCheckpoint()
|
|
|
|
def retrieve_all_slim_docs_perm_sync(
|
|
self,
|
|
start: SecondsSinceUnixEpoch | None = None,
|
|
end: SecondsSinceUnixEpoch | None = None,
|
|
callback: Any = None,
|
|
) -> Any:
|
|
"""Retrieve all simplified documents with permission sync"""
|
|
# Simplified implementation
|
|
return [] |