From a39d7e1f85ac336507923d74b582374e80ee4a15 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Wed, 16 Jul 2025 19:26:33 +0800 Subject: [PATCH] r2 transform --- .../rag_pipeline_transform_service.py | 44 +++++++++++-------- .../transform/file-general-high-quality.yml | 2 +- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/api/services/rag_pipeline/rag_pipeline_transform_service.py b/api/services/rag_pipeline/rag_pipeline_transform_service.py index 40077ab0ef..613d351bc7 100644 --- a/api/services/rag_pipeline/rag_pipeline_transform_service.py +++ b/api/services/rag_pipeline/rag_pipeline_transform_service.py @@ -16,6 +16,7 @@ from factories import variable_factory from models.dataset import Dataset, Pipeline from models.workflow import Workflow, WorkflowType from services.entities.knowledge_entities.rag_pipeline_entities import KnowledgeConfiguration, RetrievalSetting +from services.plugin.plugin_migration import PluginMigration class RagPipelineTransformService: @@ -234,6 +235,8 @@ class RagPipelineTransformService: datasource_manager = PluginDatasourceManager() + plugin_migration = PluginMigration() + tool_manager = ToolManager() installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins] @@ -244,25 +247,28 @@ class RagPipelineTransformService: plugin_unique_identifier = dependency.get("value", {}).get("plugin_unique_identifier") plugin_id = plugin_unique_identifier.split(":")[0] if plugin_id not in installed_plugins_ids: - if plugin_id == "langgenius/notion_datasource": - datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/notion") - need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier) - elif plugin_id == "langgenius/firecrawl_datasource": - datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/firecrawl") - need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier) - elif plugin_id == "langgenius/jina_datasource": - datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/jina") - need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier) - elif plugin_id == "langgenius/dify_extractor": - tool = tool_manager.get_plugin_provider(f"{plugin_id}/dify_extractor", tenant_id) - need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier) - elif plugin_id == "langgenius/general_chunk": - tool = tool_manager.get_plugin_provider(f"{plugin_id}/general_chunk", tenant_id) - need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier) - elif plugin_id == "langgenius/parent_child_chunk": - tool = tool_manager.get_plugin_provider(f"{plugin_id}/parent_child_chunk", tenant_id) - need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier) - dependency["value"]["current_identifier"] = plugin_unique_identifier + # if plugin_id == "langgenius/notion_datasource": + # datasource = plugin_migration._fetch_plugin_unique_identifier(f"{plugin_id}/notion") + # need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier) + # elif plugin_id == "langgenius/firecrawl_datasource": + # datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/firecrawl") + # need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier) + # elif plugin_id == "langgenius/jina_datasource": + # datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/jina") + # need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier) + # elif plugin_id == "langgenius/dify_extractor": + # tool = tool_manager.get_plugin_provider(f"{plugin_id}/dify_extractor", tenant_id) + # need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier) + # elif plugin_id == "langgenius/general_chunker": + # tool = tool_manager.get_plugin_provider(f"{plugin_id}/general_chunker", tenant_id) + # need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier) + # elif plugin_id == "langgenius/parentchild_chunker": + # tool = tool_manager.get_plugin_provider(f"{plugin_id}/parentchild_chunker", tenant_id) + # need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier) + # dependency["value"]["current_identifier"] = plugin_unique_identifier + plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(plugin_id) + if plugin_unique_identifier: + need_install_plugin_unique_identifiers.append(plugin_unique_identifier) if need_install_plugin_unique_identifiers: installer_manager.install_from_identifiers( tenant_id, diff --git a/api/services/rag_pipeline/transform/file-general-high-quality.yml b/api/services/rag_pipeline/transform/file-general-high-quality.yml index 7ab98fec65..946dd96709 100644 --- a/api/services/rag_pipeline/transform/file-general-high-quality.yml +++ b/api/services/rag_pipeline/transform/file-general-high-quality.yml @@ -2,7 +2,7 @@ dependencies: - current_identifier: null type: marketplace value: - plugin_unique_identifier: langgenius/qa_chunk:0.0.1@ef14ad7edce1d293ef52f14429a9acb39fa146a7b91d63a31cda905539908453 + plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b - current_identifier: null type: marketplace value: