add old auth transform

This commit is contained in:
jyong 2025-07-28 19:29:36 +08:00
parent 829e6f0d1a
commit 657e813c7f
9 changed files with 68 additions and 32 deletions

View File

@ -14,7 +14,6 @@ from configs import dify_config
from constants.languages import languages from constants.languages import languages
from core.helper import encrypter from core.helper import encrypter
from core.plugin.entities.plugin import DatasourceProviderID, PluginInstallationSource, ToolProviderID from core.plugin.entities.plugin import DatasourceProviderID, PluginInstallationSource, ToolProviderID
from core.plugin.impl.datasource import PluginDatasourceManager
from core.plugin.impl.plugin import PluginInstaller from core.plugin.impl.plugin import PluginInstaller
from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
@ -38,7 +37,6 @@ from models.provider import Provider, ProviderModel
from models.source import DataSourceApiKeyAuthBinding, DataSourceOauthBinding from models.source import DataSourceApiKeyAuthBinding, DataSourceOauthBinding
from models.tools import ToolOAuthSystemClient from models.tools import ToolOAuthSystemClient
from services.account_service import AccountService, RegisterService, TenantService from services.account_service import AccountService, RegisterService, TenantService
from services.auth import firecrawl
from services.clear_free_plan_tenant_expired_logs import ClearFreePlanTenantExpiredLogs from services.clear_free_plan_tenant_expired_logs import ClearFreePlanTenantExpiredLogs
from services.plugin.data_migration import PluginDataMigration from services.plugin.data_migration import PluginDataMigration
from services.plugin.plugin_migration import PluginMigration from services.plugin.plugin_migration import PluginMigration
@ -1255,6 +1253,7 @@ def setup_datasource_oauth_client(provider, client_params):
click.echo(click.style(f"params: {json.dumps(client_params_dict, indent=2, ensure_ascii=False)}", fg="green")) click.echo(click.style(f"params: {json.dumps(client_params_dict, indent=2, ensure_ascii=False)}", fg="green"))
click.echo(click.style(f"Datasource oauth client setup successfully. id: {oauth_client.id}", fg="green")) click.echo(click.style(f"Datasource oauth client setup successfully. id: {oauth_client.id}", fg="green"))
@click.command("transform-datasource-credentials", help="Transform datasource credentials.") @click.command("transform-datasource-credentials", help="Transform datasource credentials.")
def transform_datasource_credentials(): def transform_datasource_credentials():
""" """
@ -1273,7 +1272,6 @@ def transform_datasource_credentials():
oauth_credential_type = CredentialType.OAUTH2 oauth_credential_type = CredentialType.OAUTH2
api_key_credential_type = CredentialType.API_KEY api_key_credential_type = CredentialType.API_KEY
# deal notion credentials # deal notion credentials
deal_notion_count = 0 deal_notion_count = 0
notion_credentials = db.session.query(DataSourceOauthBinding).filter_by(provider="notion").all() notion_credentials = db.session.query(DataSourceOauthBinding).filter_by(provider="notion").all()
@ -1429,5 +1427,7 @@ def transform_datasource_credentials():
click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red")) click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red"))
return return
click.echo(click.style(f"Transforming notion successfully. deal_notion_count: {deal_notion_count}", fg="green")) click.echo(click.style(f"Transforming notion successfully. deal_notion_count: {deal_notion_count}", fg="green"))
click.echo(click.style(f"Transforming firecrawl successfully. deal_firecrawl_count: {deal_firecrawl_count}", fg="green")) click.echo(
click.style(f"Transforming firecrawl successfully. deal_firecrawl_count: {deal_firecrawl_count}", fg="green")
)
click.echo(click.style(f"Transforming jina successfully. deal_jina_count: {deal_jina_count}", fg="green")) click.echo(click.style(f"Transforming jina successfully. deal_jina_count: {deal_jina_count}", fg="green"))

View File

@ -1,5 +1,6 @@
import json import json
from typing import Generator, cast from collections.abc import Generator
from typing import cast
from flask import request from flask import request
from flask_login import current_user from flask_login import current_user
@ -20,7 +21,6 @@ from fields.data_source_fields import integrate_list_fields, integrate_notion_in
from libs.datetime_utils import naive_utc_now from libs.datetime_utils import naive_utc_now
from libs.login import login_required from libs.login import login_required
from models import DataSourceOauthBinding, Document from models import DataSourceOauthBinding, Document
from models.oauth import DatasourceProvider
from services.dataset_service import DatasetService, DocumentService from services.dataset_service import DatasetService, DocumentService
from services.datasource_provider_service import DatasourceProviderService from services.datasource_provider_service import DatasourceProviderService
from tasks.document_indexing_sync_task import document_indexing_sync_task from tasks.document_indexing_sync_task import document_indexing_sync_task

View File

@ -10,6 +10,7 @@ class NotionInfo(BaseModel):
""" """
Notion import info. Notion import info.
""" """
credential_id: Optional[str] = None credential_id: Optional[str] = None
notion_workspace_id: str notion_workspace_id: str
notion_obj_id: str notion_obj_id: str

View File

@ -1,4 +1,5 @@
from typing import Optional from typing import Optional
from core.rag.extractor.extractor_base import BaseExtractor from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document from core.rag.models.document import Document
from services.website_service import WebsiteService from services.website_service import WebsiteService
@ -16,8 +17,15 @@ class FirecrawlWebExtractor(BaseExtractor):
only_main_content: Only return the main content of the page excluding headers, navs, footers, etc. only_main_content: Only return the main content of the page excluding headers, navs, footers, etc.
""" """
def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = True, def __init__(
credential_id: Optional[str] = None): self,
url: str,
job_id: str,
tenant_id: str,
mode: str = "crawl",
only_main_content: bool = True,
credential_id: Optional[str] = None,
):
"""Initialize with url, api_key, base_url and mode.""" """Initialize with url, api_key, base_url and mode."""
self._url = url self._url = url
self.job_id = job_id self.job_id = job_id
@ -30,7 +38,9 @@ class FirecrawlWebExtractor(BaseExtractor):
"""Extract content from the URL.""" """Extract content from the URL."""
documents = [] documents = []
if self.mode == "crawl": if self.mode == "crawl":
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "firecrawl", self._url, self.tenant_id, self.credential_id) crawl_data = WebsiteService.get_crawl_url_data(
self.job_id, "firecrawl", self._url, self.tenant_id, self.credential_id
)
if crawl_data is None: if crawl_data is None:
return [] return []
document = Document( document = Document(

View File

@ -1,4 +1,5 @@
from typing import Optional from typing import Optional
from core.rag.extractor.extractor_base import BaseExtractor from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document from core.rag.models.document import Document
from services.website_service import WebsiteService from services.website_service import WebsiteService
@ -9,8 +10,15 @@ class JinaReaderWebExtractor(BaseExtractor):
Crawl and scrape websites and return content in clean llm-ready markdown. Crawl and scrape websites and return content in clean llm-ready markdown.
""" """
def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = False, def __init__(
credential_id: Optional[str] = None): self,
url: str,
job_id: str,
tenant_id: str,
mode: str = "crawl",
only_main_content: bool = False,
credential_id: Optional[str] = None,
):
"""Initialize with url, api_key, base_url and mode.""" """Initialize with url, api_key, base_url and mode."""
self._url = url self._url = url
self.job_id = job_id self.job_id = job_id
@ -23,7 +31,9 @@ class JinaReaderWebExtractor(BaseExtractor):
"""Extract content from the URL.""" """Extract content from the URL."""
documents = [] documents = []
if self.mode == "crawl": if self.mode == "crawl":
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "jinareader", self._url, self.tenant_id, self.credential_id) crawl_data = WebsiteService.get_crawl_url_data(
self.job_id, "jinareader", self._url, self.tenant_id, self.credential_id
)
if crawl_data is None: if crawl_data is None:
return [] return []
document = Document( document = Document(

View File

@ -9,8 +9,6 @@ from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document from core.rag.models.document import Document
from extensions.ext_database import db from extensions.ext_database import db
from models.dataset import Document as DocumentModel from models.dataset import Document as DocumentModel
from models.oauth import DatasourceProvider
from models.source import DataSourceOauthBinding
from services.datasource_provider_service import DatasourceProviderService from services.datasource_provider_service import DatasourceProviderService
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -1,4 +1,5 @@
from typing import Optional from typing import Optional
from core.rag.extractor.extractor_base import BaseExtractor from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document from core.rag.models.document import Document
from services.website_service import WebsiteService from services.website_service import WebsiteService
@ -17,8 +18,15 @@ class WaterCrawlWebExtractor(BaseExtractor):
only_main_content: Only return the main content of the page excluding headers, navs, footers, etc. only_main_content: Only return the main content of the page excluding headers, navs, footers, etc.
""" """
def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = True, def __init__(
credential_id: Optional[str] = None): self,
url: str,
job_id: str,
tenant_id: str,
mode: str = "crawl",
only_main_content: bool = True,
credential_id: Optional[str] = None,
):
"""Initialize with url, api_key, base_url and mode.""" """Initialize with url, api_key, base_url and mode."""
self._url = url self._url = url
self.job_id = job_id self.job_id = job_id
@ -31,7 +39,9 @@ class WaterCrawlWebExtractor(BaseExtractor):
"""Extract content from the URL.""" """Extract content from the URL."""
documents = [] documents = []
if self.mode == "crawl": if self.mode == "crawl":
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "watercrawl", self._url, self.tenant_id, self.credential_id) crawl_data = WebsiteService.get_crawl_url_data(
self.job_id, "watercrawl", self._url, self.tenant_id, self.credential_id
)
if crawl_data is None: if crawl_data is None:
return [] return []
document = Document( document = Document(

View File

@ -56,15 +56,15 @@ class DatasourceProviderService:
return {} return {}
return datasource_provider.encrypted_credentials return datasource_provider.encrypted_credentials
def get_real_credential_by_id(self, tenant_id: str, credential_id: str, provider: str, plugin_id: str) -> dict[str, Any]: def get_real_credential_by_id(
self, tenant_id: str, credential_id: str, provider: str, plugin_id: str
) -> dict[str, Any]:
""" """
get credential by id get credential by id
""" """
with Session(db.engine) as session: with Session(db.engine) as session:
datasource_provider = ( datasource_provider = (
session.query(DatasourceProvider) session.query(DatasourceProvider).filter_by(tenant_id=tenant_id, id=credential_id).first()
.filter_by(tenant_id=tenant_id, id=credential_id)
.first()
) )
if not datasource_provider: if not datasource_provider:
return {} return {}

View File

@ -120,7 +120,9 @@ class WebsiteService:
"""Service class for website crawling operations using different providers.""" """Service class for website crawling operations using different providers."""
@classmethod @classmethod
def _get_credentials_and_config(cls, tenant_id: str, provider: str, credential_id: Optional[str] = None) -> tuple[Any, Any]: def _get_credentials_and_config(
cls, tenant_id: str, provider: str, credential_id: Optional[str] = None
) -> tuple[Any, Any]:
"""Get and validate credentials for a provider.""" """Get and validate credentials for a provider."""
if credential_id: if credential_id:
if provider == "firecrawl": if provider == "firecrawl":
@ -164,7 +166,9 @@ class WebsiteService:
"""Crawl a URL using the specified provider with typed request.""" """Crawl a URL using the specified provider with typed request."""
request = api_request.to_crawl_request() request = api_request.to_crawl_request()
_, config = cls._get_credentials_and_config(current_user.current_tenant_id, request.provider, api_request.credential_id) _, config = cls._get_credentials_and_config(
current_user.current_tenant_id, request.provider, api_request.credential_id
)
if api_request.credential_id: if api_request.credential_id:
api_key = _ api_key = _
else: else:
@ -258,9 +262,9 @@ class WebsiteService:
@classmethod @classmethod
def get_crawl_status_typed(cls, api_request: WebsiteCrawlStatusApiRequest) -> dict[str, Any]: def get_crawl_status_typed(cls, api_request: WebsiteCrawlStatusApiRequest) -> dict[str, Any]:
"""Get crawl status using typed request.""" """Get crawl status using typed request."""
_, config = cls._get_credentials_and_config(current_user.current_tenant_id, _, config = cls._get_credentials_and_config(
api_request.provider, current_user.current_tenant_id, api_request.provider, api_request.credential_id
api_request.credential_id) )
if api_request.credential_id: if api_request.credential_id:
api_key = _ api_key = _
else: else:
@ -337,7 +341,9 @@ class WebsiteService:
return crawl_status_data return crawl_status_data
@classmethod @classmethod
def get_crawl_url_data(cls, job_id: str, provider: str, url: str, tenant_id: str, credential_id: Optional[str] = None) -> dict[str, Any] | None: def get_crawl_url_data(
cls, job_id: str, provider: str, url: str, tenant_id: str, credential_id: Optional[str] = None
) -> dict[str, Any] | None:
_, config = cls._get_credentials_and_config(tenant_id, provider, credential_id) _, config = cls._get_credentials_and_config(tenant_id, provider, credential_id)
if credential_id: if credential_id:
api_key = _ api_key = _
@ -412,13 +418,14 @@ class WebsiteService:
return None return None
@classmethod @classmethod
def get_scrape_url_data(cls, provider: str, url: str, tenant_id: str, only_main_content: bool, def get_scrape_url_data(
credential_id: Optional[str] = None) -> dict[str, Any]: cls, provider: str, url: str, tenant_id: str, only_main_content: bool, credential_id: Optional[str] = None
) -> dict[str, Any]:
request = ScrapeRequest(provider=provider, url=url, tenant_id=tenant_id, only_main_content=only_main_content) request = ScrapeRequest(provider=provider, url=url, tenant_id=tenant_id, only_main_content=only_main_content)
_, config = cls._get_credentials_and_config(tenant_id=request.tenant_id, _, config = cls._get_credentials_and_config(
provider=request.provider, tenant_id=request.tenant_id, provider=request.provider, credential_id=credential_id
credential_id=credential_id) )
if credential_id: if credential_id:
api_key = _ api_key = _
else: else: