mirror of
https://github.com/langgenius/dify.git
synced 2025-07-31 05:18:40 +00:00
r2 transform
This commit is contained in:
parent
1ad73ccdc8
commit
2012ea3213
@ -46,6 +46,7 @@ from services.errors.llm import InvokeRateLimitError
|
||||
from services.rag_pipeline.pipeline_generate_service import PipelineGenerateService
|
||||
from services.rag_pipeline.rag_pipeline import RagPipelineService
|
||||
from services.rag_pipeline.rag_pipeline_manage_service import RagPipelineManageService
|
||||
from services.rag_pipeline.rag_pipeline_transform_service import RagPipelineTransformService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -946,6 +947,16 @@ class RagPipelineWorkflowLastRunApi(Resource):
|
||||
if node_exec is None:
|
||||
raise NotFound("last run not found")
|
||||
return node_exec
|
||||
|
||||
class RagPipelineTransformApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, dataset_id):
|
||||
dataset_id = str(dataset_id)
|
||||
rag_pipeline_transform_service = RagPipelineTransformService()
|
||||
rag_pipeline_transform_service.transform_dataset(dataset_id)
|
||||
return {"message": "success"}
|
||||
|
||||
|
||||
api.add_resource(
|
||||
@ -1056,3 +1067,7 @@ api.add_resource(
|
||||
RagPipelineWorkflowLastRunApi,
|
||||
"/rag/pipelines/<uuid:pipeline_id>/workflows/draft/nodes/<string:node_id>/last-run",
|
||||
)
|
||||
api.add_resource(
|
||||
RagPipelineTransformApi,
|
||||
"/rag/pipelines/transform/datasets/<uuid:dataset_id>",
|
||||
)
|
@ -87,8 +87,8 @@ class RetrievalSetting(BaseModel):
|
||||
top_k: int
|
||||
score_threshold: Optional[float] = 0.5
|
||||
score_threshold_enabled: bool = False
|
||||
reranking_mode: str = "reranking_model"
|
||||
reranking_enable: bool = True
|
||||
reranking_mode: Optional[str] = "reranking_model"
|
||||
reranking_enable: Optional[bool] = True
|
||||
reranking_model: Optional[RerankingModelConfig] = None
|
||||
weights: Optional[WeightedScoreConfig] = None
|
||||
|
||||
|
@ -54,7 +54,7 @@ from core.workflow.workflow_entry import WorkflowEntry
|
||||
from extensions.ext_database import db
|
||||
from libs.infinite_scroll_pagination import InfiniteScrollPagination
|
||||
from models.account import Account
|
||||
from models.dataset import Document, Pipeline, PipelineCustomizedTemplate # type: ignore
|
||||
from models.dataset import Dataset, Document, Pipeline, PipelineCustomizedTemplate # type: ignore
|
||||
from models.enums import WorkflowRunTriggeredFrom
|
||||
from models.model import EndUser
|
||||
from models.workflow import (
|
||||
@ -72,7 +72,7 @@ from services.entities.knowledge_entities.rag_pipeline_entities import (
|
||||
)
|
||||
from services.errors.app import WorkflowHashNotEqualError
|
||||
from services.rag_pipeline.pipeline_template.pipeline_template_factory import PipelineTemplateRetrievalFactory
|
||||
from services.workflow_draft_variable_service import DraftVarLoader, DraftVariableSaver
|
||||
from services.workflow_draft_variable_service import DraftVariableSaver, DraftVarLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -233,6 +233,7 @@ class RagPipelineDslService:
|
||||
)
|
||||
dataset = pipeline.dataset
|
||||
if dataset:
|
||||
self._session.merge(dataset)
|
||||
dataset_name = dataset.name
|
||||
|
||||
# If major version mismatch, store import info in Redis
|
||||
@ -291,7 +292,7 @@ class RagPipelineDslService:
|
||||
if not dataset:
|
||||
dataset = Dataset(
|
||||
tenant_id=account.current_tenant_id,
|
||||
name=name,
|
||||
name=name + datetime.now(UTC).strftime("%Y%m%d%H%M%S%f"),
|
||||
description=description,
|
||||
icon_info={
|
||||
"type": icon_type,
|
||||
|
210
api/services/rag_pipeline/rag_pipeline_transform_service.py
Normal file
210
api/services/rag_pipeline/rag_pipeline_transform_service.py
Normal file
@ -0,0 +1,210 @@
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
import yaml
|
||||
from flask_login import current_user
|
||||
|
||||
from constants import DOCUMENT_EXTENSIONS
|
||||
from extensions.ext_database import db
|
||||
from factories import variable_factory
|
||||
from models.dataset import Dataset, Pipeline
|
||||
from models.workflow import Workflow, WorkflowType
|
||||
from services.entities.knowledge_entities.rag_pipeline_entities import KnowledgeConfiguration, RetrievalSetting
|
||||
|
||||
|
||||
class RagPipelineTransformService:
|
||||
|
||||
|
||||
def transform_dataset(self, dataset_id: str):
|
||||
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||
if not dataset:
|
||||
raise ValueError("Dataset not found")
|
||||
if dataset.pipeline_id and dataset.runtime_mode == "rag_pipeline":
|
||||
return
|
||||
if dataset.provider != "vendor":
|
||||
raise ValueError("External dataset is not supported")
|
||||
datasource_type = dataset.data_source_type
|
||||
indexing_technique = dataset.indexing_technique
|
||||
|
||||
if not datasource_type and not indexing_technique:
|
||||
return
|
||||
doc_form = dataset.doc_form
|
||||
if not doc_form:
|
||||
return
|
||||
retrieval_model = dataset.retrieval_model
|
||||
pipeline_yaml = self._get_transform_yaml(doc_form, datasource_type, indexing_technique)
|
||||
# Extract app data
|
||||
workflow_data = pipeline_yaml.get("workflow")
|
||||
graph = workflow_data.get("graph", {})
|
||||
nodes = graph.get("nodes", [])
|
||||
new_nodes = []
|
||||
|
||||
for node in nodes:
|
||||
if node.get("data", {}).get("type") == "datasource" and node.get("data", {}).get("provider_type") == "local_file":
|
||||
node = self._deal_file_extensions(node)
|
||||
if node.get("data", {}).get("type") == "knowledge-index":
|
||||
node = self._deal_knowledge_index(dataset, doc_form, indexing_technique, retrieval_model, node)
|
||||
new_nodes.append(node)
|
||||
if new_nodes:
|
||||
graph["nodes"] = new_nodes
|
||||
workflow_data["graph"] = graph
|
||||
pipeline_yaml["workflow"] = workflow_data
|
||||
# create pipeline
|
||||
pipeline = self._create_pipeline(pipeline_yaml)
|
||||
|
||||
# save chunk structure to dataset
|
||||
if doc_form == "hierarchical_model":
|
||||
dataset.chunk_structure = "hierarchical_model"
|
||||
elif doc_form == "text_model":
|
||||
dataset.chunk_structure = "text_model"
|
||||
else:
|
||||
raise ValueError("Unsupported doc form")
|
||||
|
||||
dataset.runtime_mode = "rag_pipeline"
|
||||
dataset.pipeline_id = pipeline.id
|
||||
|
||||
db.session.commit()
|
||||
|
||||
def _get_transform_yaml(self, doc_form: str, datasource_type: str, indexing_technique: str):
|
||||
if doc_form == "text_model":
|
||||
match datasource_type:
|
||||
case "upload_file":
|
||||
if indexing_technique == "high_quality":
|
||||
# get graph from transform.file-general-high-quality.yml
|
||||
with open(f"{Path(__file__).parent}/transform/file-general-high-quality.yml", "r") as f:
|
||||
pipeline_yaml = yaml.safe_load(f)
|
||||
if indexing_technique == "economy":
|
||||
# get graph from transform.file-general-economy.yml
|
||||
with open(f"{Path(__file__).parent}/transform/file-general-economy.yml", "r") as f:
|
||||
pipeline_yaml = yaml.safe_load(f)
|
||||
case "notion_import":
|
||||
if indexing_technique == "high_quality":
|
||||
# get graph from transform.notion-general-high-quality.yml
|
||||
with open(f"{Path(__file__).parent}/transform/notion-general-high-quality.yml", "r") as f:
|
||||
pipeline_yaml = yaml.safe_load(f)
|
||||
if indexing_technique == "economy":
|
||||
# get graph from transform.notion-general-economy.yml
|
||||
with open(f"{Path(__file__).parent}/transform/notion-general-economy.yml", "r") as f:
|
||||
pipeline_yaml = yaml.safe_load(f)
|
||||
case "website_crawl":
|
||||
if indexing_technique == "high_quality":
|
||||
# get graph from transform.website-crawl-general-high-quality.yml
|
||||
with open(f"{Path(__file__).parent}/transform/website-crawl-general-high-quality.yml", "r") as f:
|
||||
pipeline_yaml = yaml.safe_load(f)
|
||||
if indexing_technique == "economy":
|
||||
# get graph from transform.website-crawl-general-economy.yml
|
||||
with open(f"{Path(__file__).parent}/transform/website-crawl-general-economy.yml", "r") as f:
|
||||
pipeline_yaml = yaml.safe_load(f)
|
||||
case _:
|
||||
raise ValueError("Unsupported datasource type")
|
||||
elif doc_form == "hierarchical_model":
|
||||
match datasource_type:
|
||||
case "upload_file":
|
||||
# get graph from transform.file-parent-child.yml
|
||||
with open(f"{Path(__file__).parent}/transform/file-parent-child.yml", "r") as f:
|
||||
pipeline_yaml = yaml.safe_load(f)
|
||||
case "notion_import":
|
||||
# get graph from transform.notion-parent-child.yml
|
||||
with open(f"{Path(__file__).parent}/transform/notion-parent-child.yml", "r") as f:
|
||||
pipeline_yaml = yaml.safe_load(f)
|
||||
case "website_crawl":
|
||||
# get graph from transform.website-crawl-parent-child.yml
|
||||
with open(f"{Path(__file__).parent}/transform/website-crawl-parent-child.yml", "r") as f:
|
||||
pipeline_yaml = yaml.safe_load(f)
|
||||
case _:
|
||||
raise ValueError("Unsupported datasource type")
|
||||
else:
|
||||
raise ValueError("Unsupported doc form")
|
||||
return pipeline_yaml
|
||||
|
||||
def _deal_file_extensions(self, node: dict):
|
||||
file_extensions = node.get("data", {}).get("fileExtensions", [])
|
||||
if not file_extensions:
|
||||
return node
|
||||
file_extensions = [file_extension.lower() for file_extension in file_extensions]
|
||||
node["data"]["fileExtensions"] = DOCUMENT_EXTENSIONS
|
||||
return node
|
||||
|
||||
def _deal_knowledge_index(self, dataset: Dataset, doc_form: str, indexing_technique: str, retrieval_model: dict, node: dict):
|
||||
knowledge_configuration = node.get("data", {})
|
||||
knowledge_configuration = KnowledgeConfiguration(**knowledge_configuration)
|
||||
|
||||
if indexing_technique == "high_quality":
|
||||
knowledge_configuration.embedding_model = dataset.embedding_model
|
||||
knowledge_configuration.embedding_model_provider = dataset.embedding_model_provider
|
||||
retrieval_setting = RetrievalSetting(**retrieval_model)
|
||||
if indexing_technique == "economy":
|
||||
retrieval_setting.search_method = "keyword_search"
|
||||
knowledge_configuration.retrieval_model = retrieval_setting
|
||||
|
||||
return knowledge_configuration.model_dump()
|
||||
|
||||
def _create_pipeline(
|
||||
self,
|
||||
data: dict,
|
||||
) -> Pipeline:
|
||||
"""Create a new app or update an existing one."""
|
||||
pipeline_data = data.get("rag_pipeline", {})
|
||||
# Initialize pipeline based on mode
|
||||
workflow_data = data.get("workflow")
|
||||
if not workflow_data or not isinstance(workflow_data, dict):
|
||||
raise ValueError("Missing workflow data for rag pipeline")
|
||||
|
||||
environment_variables_list = workflow_data.get("environment_variables", [])
|
||||
environment_variables = [
|
||||
variable_factory.build_environment_variable_from_mapping(obj) for obj in environment_variables_list
|
||||
]
|
||||
conversation_variables_list = workflow_data.get("conversation_variables", [])
|
||||
conversation_variables = [
|
||||
variable_factory.build_conversation_variable_from_mapping(obj) for obj in conversation_variables_list
|
||||
]
|
||||
rag_pipeline_variables_list = workflow_data.get("rag_pipeline_variables", [])
|
||||
|
||||
graph = workflow_data.get("graph", {})
|
||||
|
||||
# Create new app
|
||||
pipeline = Pipeline()
|
||||
pipeline.id = str(uuid4())
|
||||
pipeline.tenant_id = current_user.current_tenant_id
|
||||
pipeline.name = pipeline_data.get("name", "")
|
||||
pipeline.description = pipeline_data.get("description", "")
|
||||
pipeline.created_by = current_user.id
|
||||
pipeline.updated_by = current_user.id
|
||||
pipeline.is_published = True
|
||||
pipeline.is_public = True
|
||||
|
||||
db.session.add(pipeline)
|
||||
db.session.flush()
|
||||
# create draft workflow
|
||||
draft_workflow = Workflow(
|
||||
tenant_id=pipeline.tenant_id,
|
||||
app_id=pipeline.id,
|
||||
features="{}",
|
||||
type=WorkflowType.RAG_PIPELINE.value,
|
||||
version="draft",
|
||||
graph=json.dumps(graph),
|
||||
created_by=current_user.id,
|
||||
environment_variables=environment_variables,
|
||||
conversation_variables=conversation_variables,
|
||||
rag_pipeline_variables=rag_pipeline_variables_list,
|
||||
)
|
||||
published_workflow = Workflow(
|
||||
tenant_id=pipeline.tenant_id,
|
||||
app_id=pipeline.id,
|
||||
features="{}",
|
||||
type=WorkflowType.RAG_PIPELINE.value,
|
||||
version=str(datetime.now(UTC).replace(tzinfo=None)),
|
||||
graph=json.dumps(graph),
|
||||
created_by=current_user.id,
|
||||
environment_variables=environment_variables,
|
||||
conversation_variables=conversation_variables,
|
||||
rag_pipeline_variables=rag_pipeline_variables_list,
|
||||
)
|
||||
db.session.add(draft_workflow)
|
||||
db.session.add(published_workflow)
|
||||
db.session.flush()
|
||||
pipeline.workflow_id = published_workflow.id
|
||||
db.session.add(pipeline)
|
||||
return pipeline
|
708
api/services/rag_pipeline/transform/file-general-economy.yml
Normal file
708
api/services/rag_pipeline/transform/file-general-economy.yml
Normal file
@ -0,0 +1,708 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/dify_extractor:0.0.4@0cb3f06230a377c4c037fa7b5e21f4f4e362e5f24a59ed7bf4950ff75e6f1e61
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: ''
|
||||
icon_type: emoji
|
||||
name: file-general-economy
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: if-else
|
||||
id: 1752479895761-source-1752481129417-target
|
||||
source: '1752479895761'
|
||||
sourceHandle: source
|
||||
target: '1752481129417'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: tool
|
||||
id: 1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
target: '1752480460682'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: document-extractor
|
||||
id: 1752481129417-false-1752481112180-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 'false'
|
||||
target: '1752481112180'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: variable-aggregator
|
||||
id: 1752480460682-source-1752482022496-target
|
||||
source: '1752480460682'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: document-extractor
|
||||
targetType: variable-aggregator
|
||||
id: 1752481112180-source-1752482022496-target
|
||||
source: '1752481112180'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752482022496-source-1752482151668-target
|
||||
source: '1752482022496'
|
||||
sourceHandle: source
|
||||
target: '1752482151668'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752482151668-source-1752477924228-target
|
||||
source: '1752482151668'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752482151668'
|
||||
- result
|
||||
indexing_technique: economy
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: keyword_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: true
|
||||
title: 知识库
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 1076.4656678451215
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1076.4656678451215
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: File
|
||||
datasource_name: upload-file
|
||||
datasource_parameters: {}
|
||||
fileExtensions:
|
||||
- txt
|
||||
- markdown
|
||||
- mdx
|
||||
- pdf
|
||||
- html
|
||||
- xlsx
|
||||
- xls
|
||||
- vtt
|
||||
- properties
|
||||
- doc
|
||||
- docx
|
||||
- csv
|
||||
- eml
|
||||
- msg
|
||||
- pptx
|
||||
- xml
|
||||
- epub
|
||||
- ppt
|
||||
- md
|
||||
plugin_id: langgenius/file
|
||||
provider_name: file
|
||||
provider_type: local_file
|
||||
selected: false
|
||||
title: File
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752479895761'
|
||||
position:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
documents:
|
||||
description: the documents extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
images:
|
||||
description: The images extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,
|
||||
jpeg)
|
||||
ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,
|
||||
jpeg)
|
||||
pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,
|
||||
jpg, jpeg)
|
||||
zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)
|
||||
label:
|
||||
en_US: file
|
||||
ja_JP: file
|
||||
pt_BR: file
|
||||
zh_Hans: file
|
||||
llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,
|
||||
png, jpg, jpeg)
|
||||
max: null
|
||||
min: null
|
||||
name: file
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: file
|
||||
params:
|
||||
file: ''
|
||||
provider_id: langgenius/dify_extractor/dify_extractor
|
||||
provider_name: langgenius/dify_extractor/dify_extractor
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: Dify文本提取器
|
||||
tool_configurations: {}
|
||||
tool_description: Dify Extractor
|
||||
tool_label: Dify文本提取器
|
||||
tool_name: dify_extractor
|
||||
tool_parameters:
|
||||
file:
|
||||
type: variable
|
||||
value:
|
||||
- '1752479895761'
|
||||
- file
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752480460682'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_array_file: false
|
||||
selected: false
|
||||
title: 文档提取器
|
||||
type: document-extractor
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
height: 90
|
||||
id: '1752481112180'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
cases:
|
||||
- case_id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
conditions:
|
||||
- comparison_operator: is
|
||||
id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d
|
||||
value: .xlsx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: d0e88f5e-dfe3-4bae-af0c-dbec267500de
|
||||
value: .xls
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d
|
||||
value: .md
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73
|
||||
value: .markdown
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: f9541513-1e71-4dc1-9db5-35dc84a39e3c
|
||||
value: .mdx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d
|
||||
value: .html
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1
|
||||
value: .htm
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2
|
||||
value: .docx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8
|
||||
value: .csv
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602
|
||||
value: .txt
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
logical_operator: or
|
||||
selected: false
|
||||
title: 条件分支
|
||||
type: if-else
|
||||
height: 358
|
||||
id: '1752481129417'
|
||||
position:
|
||||
x: -489.57009543377865
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -489.57009543377865
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
advanced_settings:
|
||||
group_enabled: false
|
||||
groups:
|
||||
- groupId: f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7
|
||||
group_name: Group1
|
||||
output_type: string
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
output_type: string
|
||||
selected: false
|
||||
title: 变量聚合器
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
height: 129
|
||||
id: '1752482022496'
|
||||
position:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: The text you want to chunk.
|
||||
pt_BR: The text you want to chunk.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: Input Variable
|
||||
pt_BR: Input Variable
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: The delimiter of the chunks.
|
||||
pt_BR: The delimiter of the chunks.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: Delimiter
|
||||
pt_BR: Delimiter
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: The maximum chunk length.
|
||||
pt_BR: The maximum chunk length.
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: Maximum Chunk Length
|
||||
pt_BR: Maximum Chunk Length
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: The chunk overlap length.
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: Chunk Overlap Length
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: Replace consecutive spaces, newlines and tabs
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: Replace Consecutive Spaces, Newlines and Tabs
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: Delete all URLs and email addresses
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: Delete All URLs and Email Addresses
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunk/general_chunk
|
||||
provider_name: langgenius/general_chunk/general_chunk
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: 通用文本分块
|
||||
tool_configurations: {}
|
||||
tool_description: 一个用于通用文本分块模式的工具,检索和召回的块是相同的。
|
||||
tool_label: 通用文本分块
|
||||
tool_name: general_chunk
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752482022496.output#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752482151668'
|
||||
position:
|
||||
x: 693.5300771507484
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 693.5300771507484
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: 701.4999626224237
|
||||
y: 128.33739021504016
|
||||
zoom: 0.48941689643726966
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Chunk overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
@ -0,0 +1,708 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/dify_extractor:0.0.4@0cb3f06230a377c4c037fa7b5e21f4f4e362e5f24a59ed7bf4950ff75e6f1e61
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: '#FFF4ED'
|
||||
icon_type: emoji
|
||||
name: file-general-high-quality
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: if-else
|
||||
id: 1752479895761-source-1752481129417-target
|
||||
source: '1752479895761'
|
||||
sourceHandle: source
|
||||
target: '1752481129417'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: tool
|
||||
id: 1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
target: '1752480460682'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: document-extractor
|
||||
id: 1752481129417-false-1752481112180-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 'false'
|
||||
target: '1752481112180'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: variable-aggregator
|
||||
id: 1752480460682-source-1752482022496-target
|
||||
source: '1752480460682'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: document-extractor
|
||||
targetType: variable-aggregator
|
||||
id: 1752481112180-source-1752482022496-target
|
||||
source: '1752481112180'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752482022496-source-1752482151668-target
|
||||
source: '1752482022496'
|
||||
sourceHandle: source
|
||||
target: '1752482151668'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752482151668-source-1752477924228-target
|
||||
source: '1752482151668'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752482151668'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: false
|
||||
title: 知识库
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 1076.4656678451215
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1076.4656678451215
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: File
|
||||
datasource_name: upload-file
|
||||
datasource_parameters: {}
|
||||
fileExtensions:
|
||||
- txt
|
||||
- markdown
|
||||
- mdx
|
||||
- pdf
|
||||
- html
|
||||
- xlsx
|
||||
- xls
|
||||
- vtt
|
||||
- properties
|
||||
- doc
|
||||
- docx
|
||||
- csv
|
||||
- eml
|
||||
- msg
|
||||
- pptx
|
||||
- xml
|
||||
- epub
|
||||
- ppt
|
||||
- md
|
||||
plugin_id: langgenius/file
|
||||
provider_name: file
|
||||
provider_type: local_file
|
||||
selected: false
|
||||
title: File
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752479895761'
|
||||
position:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
documents:
|
||||
description: the documents extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
images:
|
||||
description: The images extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,
|
||||
jpeg)
|
||||
ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,
|
||||
jpeg)
|
||||
pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,
|
||||
jpg, jpeg)
|
||||
zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)
|
||||
label:
|
||||
en_US: file
|
||||
ja_JP: file
|
||||
pt_BR: file
|
||||
zh_Hans: file
|
||||
llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,
|
||||
png, jpg, jpeg)
|
||||
max: null
|
||||
min: null
|
||||
name: file
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: file
|
||||
params:
|
||||
file: ''
|
||||
provider_id: langgenius/dify_extractor/dify_extractor
|
||||
provider_name: langgenius/dify_extractor/dify_extractor
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: Dify文本提取器
|
||||
tool_configurations: {}
|
||||
tool_description: Dify Extractor
|
||||
tool_label: Dify文本提取器
|
||||
tool_name: dify_extractor
|
||||
tool_parameters:
|
||||
file:
|
||||
type: variable
|
||||
value:
|
||||
- '1752479895761'
|
||||
- file
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752480460682'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_array_file: false
|
||||
selected: false
|
||||
title: 文档提取器
|
||||
type: document-extractor
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
height: 90
|
||||
id: '1752481112180'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
cases:
|
||||
- case_id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
conditions:
|
||||
- comparison_operator: is
|
||||
id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d
|
||||
value: .xlsx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: d0e88f5e-dfe3-4bae-af0c-dbec267500de
|
||||
value: .xls
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d
|
||||
value: .md
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73
|
||||
value: .markdown
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: f9541513-1e71-4dc1-9db5-35dc84a39e3c
|
||||
value: .mdx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d
|
||||
value: .html
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1
|
||||
value: .htm
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2
|
||||
value: .docx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8
|
||||
value: .csv
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602
|
||||
value: .txt
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
logical_operator: or
|
||||
selected: false
|
||||
title: 条件分支
|
||||
type: if-else
|
||||
height: 358
|
||||
id: '1752481129417'
|
||||
position:
|
||||
x: -489.57009543377865
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -489.57009543377865
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
advanced_settings:
|
||||
group_enabled: false
|
||||
groups:
|
||||
- groupId: f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7
|
||||
group_name: Group1
|
||||
output_type: string
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
output_type: string
|
||||
selected: false
|
||||
title: 变量聚合器
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
height: 129
|
||||
id: '1752482022496'
|
||||
position:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: The text you want to chunk.
|
||||
pt_BR: The text you want to chunk.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: Input Variable
|
||||
pt_BR: Input Variable
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: The delimiter of the chunks.
|
||||
pt_BR: The delimiter of the chunks.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: Delimiter
|
||||
pt_BR: Delimiter
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: The maximum chunk length.
|
||||
pt_BR: The maximum chunk length.
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: Maximum Chunk Length
|
||||
pt_BR: Maximum Chunk Length
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: The chunk overlap length.
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: Chunk Overlap Length
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: Replace consecutive spaces, newlines and tabs
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: Replace Consecutive Spaces, Newlines and Tabs
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: Delete all URLs and email addresses
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: Delete All URLs and Email Addresses
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunk/general_chunk
|
||||
provider_name: langgenius/general_chunk/general_chunk
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: 通用文本分块
|
||||
tool_configurations: {}
|
||||
tool_description: 一个用于通用文本分块模式的工具,检索和召回的块是相同的。
|
||||
tool_label: 通用文本分块
|
||||
tool_name: general_chunk
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752482022496.output#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752482151668'
|
||||
position:
|
||||
x: 693.5300771507484
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 693.5300771507484
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: 701.4999626224237
|
||||
y: 128.33739021504016
|
||||
zoom: 0.48941689643726966
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Chunk overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
816
api/services/rag_pipeline/transform/file-parentchild.yml
Normal file
816
api/services/rag_pipeline/transform/file-parentchild.yml
Normal file
@ -0,0 +1,816 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/parent_child_chunk:0.0.1@f8f9ba1f3bcda159ebc0168baa755c2181b923da8157ebb439b8046019f5b510
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/dify_extractor:0.0.4@0cb3f06230a377c4c037fa7b5e21f4f4e362e5f24a59ed7bf4950ff75e6f1e61
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: '#FFF4ED'
|
||||
icon_type: emoji
|
||||
name: file-parentchild
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: if-else
|
||||
id: 1752479895761-source-1752481129417-target
|
||||
source: '1752479895761'
|
||||
sourceHandle: source
|
||||
target: '1752481129417'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: tool
|
||||
id: 1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
target: '1752480460682'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: document-extractor
|
||||
id: 1752481129417-false-1752481112180-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 'false'
|
||||
target: '1752481112180'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: variable-aggregator
|
||||
id: 1752480460682-source-1752482022496-target
|
||||
source: '1752480460682'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: document-extractor
|
||||
targetType: variable-aggregator
|
||||
id: 1752481112180-source-1752482022496-target
|
||||
source: '1752481112180'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752482022496-source-1752575473519-target
|
||||
source: '1752482022496'
|
||||
sourceHandle: source
|
||||
target: '1752575473519'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752575473519-source-1752477924228-target
|
||||
source: '1752575473519'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: hierarchical_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752575473519'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: false
|
||||
title: 知识库
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 994.3774545394483
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 994.3774545394483
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: File
|
||||
datasource_name: upload-file
|
||||
datasource_parameters: {}
|
||||
fileExtensions:
|
||||
- txt
|
||||
- markdown
|
||||
- mdx
|
||||
- pdf
|
||||
- html
|
||||
- xlsx
|
||||
- xls
|
||||
- vtt
|
||||
- properties
|
||||
- doc
|
||||
- docx
|
||||
- csv
|
||||
- eml
|
||||
- msg
|
||||
- pptx
|
||||
- xml
|
||||
- epub
|
||||
- ppt
|
||||
- md
|
||||
plugin_id: langgenius/file
|
||||
provider_name: file
|
||||
provider_type: local_file
|
||||
selected: false
|
||||
title: File
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752479895761'
|
||||
position:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
documents:
|
||||
description: the documents extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
images:
|
||||
description: The images extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,
|
||||
jpeg)
|
||||
ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,
|
||||
jpeg)
|
||||
pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,
|
||||
jpg, jpeg)
|
||||
zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)
|
||||
label:
|
||||
en_US: file
|
||||
ja_JP: file
|
||||
pt_BR: file
|
||||
zh_Hans: file
|
||||
llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,
|
||||
png, jpg, jpeg)
|
||||
max: null
|
||||
min: null
|
||||
name: file
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: file
|
||||
params:
|
||||
file: ''
|
||||
provider_id: langgenius/dify_extractor/dify_extractor
|
||||
provider_name: langgenius/dify_extractor/dify_extractor
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: Dify文本提取器
|
||||
tool_configurations: {}
|
||||
tool_description: Dify Extractor
|
||||
tool_label: Dify文本提取器
|
||||
tool_name: dify_extractor
|
||||
tool_parameters:
|
||||
file:
|
||||
type: variable
|
||||
value:
|
||||
- '1752479895761'
|
||||
- file
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752480460682'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_array_file: false
|
||||
selected: false
|
||||
title: 文档提取器
|
||||
type: document-extractor
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
height: 90
|
||||
id: '1752481112180'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
cases:
|
||||
- case_id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
conditions:
|
||||
- comparison_operator: is
|
||||
id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d
|
||||
value: .xlsx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: d0e88f5e-dfe3-4bae-af0c-dbec267500de
|
||||
value: .xls
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d
|
||||
value: .md
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73
|
||||
value: .markdown
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: f9541513-1e71-4dc1-9db5-35dc84a39e3c
|
||||
value: .mdx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d
|
||||
value: .html
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1
|
||||
value: .htm
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2
|
||||
value: .docx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8
|
||||
value: .csv
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602
|
||||
value: .txt
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
logical_operator: or
|
||||
selected: false
|
||||
title: 条件分支
|
||||
type: if-else
|
||||
height: 358
|
||||
id: '1752481129417'
|
||||
position:
|
||||
x: -512.2335487893622
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -512.2335487893622
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
advanced_settings:
|
||||
group_enabled: false
|
||||
groups:
|
||||
- groupId: f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7
|
||||
group_name: Group1
|
||||
output_type: string
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
output_type: string
|
||||
selected: false
|
||||
title: 变量聚合器
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
height: 129
|
||||
id: '1752482022496'
|
||||
position:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: Parent child chunks result
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: The text you want to chunk.
|
||||
pt_BR: The text you want to chunk.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input text
|
||||
ja_JP: Input text
|
||||
pt_BR: Input text
|
||||
zh_Hans: 输入文本
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_text
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 1024
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for chunking
|
||||
ja_JP: Maximum length for chunking
|
||||
pt_BR: Comprimento máximo para divisão
|
||||
zh_Hans: 用于分块的最大长度
|
||||
label:
|
||||
en_US: Maximum Length
|
||||
ja_JP: Maximum Length
|
||||
pt_BR: Comprimento Máximo
|
||||
zh_Hans: 最大长度
|
||||
llm_description: Maximum length allowed per chunk
|
||||
max: null
|
||||
min: null
|
||||
name: max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '
|
||||
|
||||
|
||||
'
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for chunking
|
||||
ja_JP: Separator used for chunking
|
||||
pt_BR: Separador usado para divisão
|
||||
zh_Hans: 用于分块的分隔符
|
||||
label:
|
||||
en_US: Chunk Separator
|
||||
ja_JP: Chunk Separator
|
||||
pt_BR: Separador de Divisão
|
||||
zh_Hans: 分块分隔符
|
||||
llm_description: The separator used to split chunks
|
||||
max: null
|
||||
min: null
|
||||
name: separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 512
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for subchunking
|
||||
ja_JP: Maximum length for subchunking
|
||||
pt_BR: Comprimento máximo para subdivisão
|
||||
zh_Hans: 用于子分块的最大长度
|
||||
label:
|
||||
en_US: Subchunk Maximum Length
|
||||
ja_JP: Subchunk Maximum Length
|
||||
pt_BR: Comprimento Máximo de Subdivisão
|
||||
zh_Hans: 子分块最大长度
|
||||
llm_description: Maximum length allowed per subchunk
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '. '
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for subchunking
|
||||
ja_JP: Separator used for subchunking
|
||||
pt_BR: Separador usado para subdivisão
|
||||
zh_Hans: 用于子分块的分隔符
|
||||
label:
|
||||
en_US: Subchunk Separator
|
||||
ja_JP: Subchunk Separator
|
||||
pt_BR: Separador de Subdivisão
|
||||
zh_Hans: 子分块分隔符
|
||||
llm_description: The separator used to split subchunks
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: paragraph
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Split text into paragraphs based on separator and maximum chunk
|
||||
length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
ja_JP: Split text into paragraphs based on separator and maximum chunk
|
||||
length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
pt_BR: Dividir texto em parágrafos com base no separador e no comprimento
|
||||
máximo do bloco, usando o texto dividido como bloco pai ou documento
|
||||
completo como bloco pai e diretamente recuperá-lo.
|
||||
zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。
|
||||
label:
|
||||
en_US: Parent Mode
|
||||
ja_JP: Parent Mode
|
||||
pt_BR: Modo Pai
|
||||
zh_Hans: 父块模式
|
||||
llm_description: Split text into paragraphs based on separator and maximum
|
||||
chunk length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
max: null
|
||||
min: null
|
||||
name: parent_mode
|
||||
options:
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Paragraph
|
||||
ja_JP: Paragraph
|
||||
pt_BR: Parágrafo
|
||||
zh_Hans: 段落
|
||||
value: paragraph
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Full Document
|
||||
ja_JP: Full Document
|
||||
pt_BR: Documento Completo
|
||||
zh_Hans: 全文
|
||||
value: full_doc
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: select
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove extra spaces in the text
|
||||
ja_JP: Whether to remove extra spaces in the text
|
||||
pt_BR: Se deve remover espaços extras no texto
|
||||
zh_Hans: 是否移除文本中的多余空格
|
||||
label:
|
||||
en_US: Remove Extra Spaces
|
||||
ja_JP: Remove Extra Spaces
|
||||
pt_BR: Remover Espaços Extras
|
||||
zh_Hans: 移除多余空格
|
||||
llm_description: Whether to remove extra spaces in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_extra_spaces
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove URLs and emails in the text
|
||||
ja_JP: Whether to remove URLs and emails in the text
|
||||
pt_BR: Se deve remover URLs e e-mails no texto
|
||||
zh_Hans: 是否移除文本中的URL和电子邮件地址
|
||||
label:
|
||||
en_US: Remove URLs and Emails
|
||||
ja_JP: Remove URLs and Emails
|
||||
pt_BR: Remover URLs e E-mails
|
||||
zh_Hans: 移除URL和电子邮件地址
|
||||
llm_description: Whether to remove URLs and emails in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_urls_emails
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
input_text: ''
|
||||
max_length: ''
|
||||
parent_mode: ''
|
||||
remove_extra_spaces: ''
|
||||
remove_urls_emails: ''
|
||||
separator: ''
|
||||
subchunk_max_length: ''
|
||||
subchunk_separator: ''
|
||||
provider_id: langgenius/parent_child_chunk/parent_child_chunk
|
||||
provider_name: langgenius/parent_child_chunk/parent_child_chunk
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: 父子分块处理器
|
||||
tool_configurations: {}
|
||||
tool_description: 将文档处理为父子分块结构
|
||||
tool_label: 父子分块处理器
|
||||
tool_name: parent_child_chunk
|
||||
tool_parameters:
|
||||
input_text:
|
||||
type: mixed
|
||||
value: '{{#1752482022496.output#}}'
|
||||
max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
parent_mode:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- parent_mode
|
||||
remove_extra_spaces:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
remove_urls_emails:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
subchunk_max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- child_max_chunk_length
|
||||
subchunk_separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.child_delimiter#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752575473519'
|
||||
position:
|
||||
x: 637.9241611063885
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 637.9241611063885
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: 948.6766333808323
|
||||
y: -102.06757184183238
|
||||
zoom: 0.8375774577380971
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: Delimiter
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 1024
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n
|
||||
label: Child delimiter
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: child_delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 512
|
||||
label: Child max chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: child_max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: paragraph
|
||||
label: Parent mode
|
||||
max_length: 48
|
||||
options:
|
||||
- full_doc
|
||||
- paragraph
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: select
|
||||
unit: null
|
||||
variable: parent_mode
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
394
api/services/rag_pipeline/transform/notion-general-economy.yml
Normal file
394
api/services/rag_pipeline/transform/notion-general-economy.yml
Normal file
@ -0,0 +1,394 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: ''
|
||||
icon_type: emoji
|
||||
name: notion-general-economy
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752482151668-source-1752477924228-target
|
||||
source: '1752482151668'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: tool
|
||||
id: 1752489759475-source-1752482151668-target
|
||||
source: '1752489759475'
|
||||
sourceHandle: source
|
||||
target: '1752482151668'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752482151668'
|
||||
- result
|
||||
indexing_technique: economy
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: keyword_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: true
|
||||
title: 知识库
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 1444.5503479271906
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1444.5503479271906
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: The text you want to chunk.
|
||||
pt_BR: The text you want to chunk.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: Input Variable
|
||||
pt_BR: Input Variable
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: The delimiter of the chunks.
|
||||
pt_BR: The delimiter of the chunks.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: Delimiter
|
||||
pt_BR: Delimiter
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: The maximum chunk length.
|
||||
pt_BR: The maximum chunk length.
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: Maximum Chunk Length
|
||||
pt_BR: Maximum Chunk Length
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: The chunk overlap length.
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: Chunk Overlap Length
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: Replace consecutive spaces, newlines and tabs
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: Replace Consecutive Spaces, Newlines and Tabs
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: Delete all URLs and email addresses
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: Delete All URLs and Email Addresses
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunk/general_chunk
|
||||
provider_name: langgenius/general_chunk/general_chunk
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: 通用文本分块
|
||||
tool_configurations: {}
|
||||
tool_description: 一个用于通用文本分块模式的工具,检索和召回的块是相同的。
|
||||
tool_label: 通用文本分块
|
||||
tool_name: general_chunk
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752489759475.content#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752482151668'
|
||||
position:
|
||||
x: 1063.6922916384628
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1063.6922916384628
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Notion数据源
|
||||
datasource_name: notion_datasource
|
||||
datasource_parameters: {}
|
||||
plugin_id: langgenius/notion_datasource
|
||||
provider_name: notion
|
||||
provider_type: online_document
|
||||
selected: false
|
||||
title: Notion数据源
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752489759475'
|
||||
position:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -838.569649323166
|
||||
y: -168.94656489167426
|
||||
zoom: 1.286925643857699
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Chunk overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
@ -0,0 +1,394 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: '#FFF4ED'
|
||||
icon_type: emoji
|
||||
name: notion-general-high-quality
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752482151668-source-1752477924228-target
|
||||
source: '1752482151668'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: tool
|
||||
id: 1752489759475-source-1752482151668-target
|
||||
source: '1752489759475'
|
||||
sourceHandle: source
|
||||
target: '1752482151668'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752482151668'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: true
|
||||
title: 知识库
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 1444.5503479271906
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1444.5503479271906
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: The text you want to chunk.
|
||||
pt_BR: The text you want to chunk.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: Input Variable
|
||||
pt_BR: Input Variable
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: The delimiter of the chunks.
|
||||
pt_BR: The delimiter of the chunks.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: Delimiter
|
||||
pt_BR: Delimiter
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: The maximum chunk length.
|
||||
pt_BR: The maximum chunk length.
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: Maximum Chunk Length
|
||||
pt_BR: Maximum Chunk Length
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: The chunk overlap length.
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: Chunk Overlap Length
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: Replace consecutive spaces, newlines and tabs
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: Replace Consecutive Spaces, Newlines and Tabs
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: Delete all URLs and email addresses
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: Delete All URLs and Email Addresses
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunk/general_chunk
|
||||
provider_name: langgenius/general_chunk/general_chunk
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: 通用文本分块
|
||||
tool_configurations: {}
|
||||
tool_description: 一个用于通用文本分块模式的工具,检索和召回的块是相同的。
|
||||
tool_label: 通用文本分块
|
||||
tool_name: general_chunk
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752489759475.content#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752482151668'
|
||||
position:
|
||||
x: 1063.6922916384628
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1063.6922916384628
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Notion数据源
|
||||
datasource_name: notion_datasource
|
||||
datasource_parameters: {}
|
||||
plugin_id: langgenius/notion_datasource
|
||||
provider_name: notion
|
||||
provider_type: online_document
|
||||
selected: false
|
||||
title: Notion数据源
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752489759475'
|
||||
position:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -838.569649323166
|
||||
y: -168.94656489167426
|
||||
zoom: 1.286925643857699
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Chunk overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
503
api/services/rag_pipeline/transform/notion-parentchild.yml
Normal file
503
api/services/rag_pipeline/transform/notion-parentchild.yml
Normal file
@ -0,0 +1,503 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/parent_child_chunk:0.0.1@f8f9ba1f3bcda159ebc0168baa755c2181b923da8157ebb439b8046019f5b510
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: ''
|
||||
icon_type: emoji
|
||||
name: notion-parentchild
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: tool
|
||||
id: 1752489759475-source-1752490343805-target
|
||||
source: '1752489759475'
|
||||
sourceHandle: source
|
||||
target: '1752490343805'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752490343805-source-1752477924228-target
|
||||
source: '1752490343805'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: hierarchical_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752490343805'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: false
|
||||
title: 知识库
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 1486.2052698032674
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1486.2052698032674
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Notion数据源
|
||||
datasource_name: notion_datasource
|
||||
datasource_parameters: {}
|
||||
plugin_id: langgenius/notion_datasource
|
||||
provider_name: notion
|
||||
provider_type: online_document
|
||||
selected: false
|
||||
title: Notion数据源
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752489759475'
|
||||
position:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: Parent child chunks result
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: The text you want to chunk.
|
||||
pt_BR: The text you want to chunk.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input text
|
||||
ja_JP: Input text
|
||||
pt_BR: Input text
|
||||
zh_Hans: 输入文本
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_text
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 1024
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for chunking
|
||||
ja_JP: Maximum length for chunking
|
||||
pt_BR: Comprimento máximo para divisão
|
||||
zh_Hans: 用于分块的最大长度
|
||||
label:
|
||||
en_US: Maximum Length
|
||||
ja_JP: Maximum Length
|
||||
pt_BR: Comprimento Máximo
|
||||
zh_Hans: 最大长度
|
||||
llm_description: Maximum length allowed per chunk
|
||||
max: null
|
||||
min: null
|
||||
name: max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '
|
||||
|
||||
|
||||
'
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for chunking
|
||||
ja_JP: Separator used for chunking
|
||||
pt_BR: Separador usado para divisão
|
||||
zh_Hans: 用于分块的分隔符
|
||||
label:
|
||||
en_US: Chunk Separator
|
||||
ja_JP: Chunk Separator
|
||||
pt_BR: Separador de Divisão
|
||||
zh_Hans: 分块分隔符
|
||||
llm_description: The separator used to split chunks
|
||||
max: null
|
||||
min: null
|
||||
name: separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 512
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for subchunking
|
||||
ja_JP: Maximum length for subchunking
|
||||
pt_BR: Comprimento máximo para subdivisão
|
||||
zh_Hans: 用于子分块的最大长度
|
||||
label:
|
||||
en_US: Subchunk Maximum Length
|
||||
ja_JP: Subchunk Maximum Length
|
||||
pt_BR: Comprimento Máximo de Subdivisão
|
||||
zh_Hans: 子分块最大长度
|
||||
llm_description: Maximum length allowed per subchunk
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '. '
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for subchunking
|
||||
ja_JP: Separator used for subchunking
|
||||
pt_BR: Separador usado para subdivisão
|
||||
zh_Hans: 用于子分块的分隔符
|
||||
label:
|
||||
en_US: Subchunk Separator
|
||||
ja_JP: Subchunk Separator
|
||||
pt_BR: Separador de Subdivisão
|
||||
zh_Hans: 子分块分隔符
|
||||
llm_description: The separator used to split subchunks
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: paragraph
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Split text into paragraphs based on separator and maximum chunk
|
||||
length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
ja_JP: Split text into paragraphs based on separator and maximum chunk
|
||||
length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
pt_BR: Dividir texto em parágrafos com base no separador e no comprimento
|
||||
máximo do bloco, usando o texto dividido como bloco pai ou documento
|
||||
completo como bloco pai e diretamente recuperá-lo.
|
||||
zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。
|
||||
label:
|
||||
en_US: Parent Mode
|
||||
ja_JP: Parent Mode
|
||||
pt_BR: Modo Pai
|
||||
zh_Hans: 父块模式
|
||||
llm_description: Split text into paragraphs based on separator and maximum
|
||||
chunk length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
max: null
|
||||
min: null
|
||||
name: parent_mode
|
||||
options:
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Paragraph
|
||||
ja_JP: Paragraph
|
||||
pt_BR: Parágrafo
|
||||
zh_Hans: 段落
|
||||
value: paragraph
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Full Document
|
||||
ja_JP: Full Document
|
||||
pt_BR: Documento Completo
|
||||
zh_Hans: 全文
|
||||
value: full_doc
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: select
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove extra spaces in the text
|
||||
ja_JP: Whether to remove extra spaces in the text
|
||||
pt_BR: Se deve remover espaços extras no texto
|
||||
zh_Hans: 是否移除文本中的多余空格
|
||||
label:
|
||||
en_US: Remove Extra Spaces
|
||||
ja_JP: Remove Extra Spaces
|
||||
pt_BR: Remover Espaços Extras
|
||||
zh_Hans: 移除多余空格
|
||||
llm_description: Whether to remove extra spaces in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_extra_spaces
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove URLs and emails in the text
|
||||
ja_JP: Whether to remove URLs and emails in the text
|
||||
pt_BR: Se deve remover URLs e e-mails no texto
|
||||
zh_Hans: 是否移除文本中的URL和电子邮件地址
|
||||
label:
|
||||
en_US: Remove URLs and Emails
|
||||
ja_JP: Remove URLs and Emails
|
||||
pt_BR: Remover URLs e E-mails
|
||||
zh_Hans: 移除URL和电子邮件地址
|
||||
llm_description: Whether to remove URLs and emails in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_urls_emails
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
input_text: ''
|
||||
max_length: ''
|
||||
parent_mode: ''
|
||||
remove_extra_spaces: ''
|
||||
remove_urls_emails: ''
|
||||
separator: ''
|
||||
subchunk_max_length: ''
|
||||
subchunk_separator: ''
|
||||
provider_id: langgenius/parent_child_chunk/parent_child_chunk
|
||||
provider_name: langgenius/parent_child_chunk/parent_child_chunk
|
||||
provider_type: builtin
|
||||
selected: true
|
||||
title: 父子分块处理器
|
||||
tool_configurations: {}
|
||||
tool_description: 将文档处理为父子分块结构
|
||||
tool_label: 父子分块处理器
|
||||
tool_name: parent_child_chunk
|
||||
tool_parameters:
|
||||
input_text:
|
||||
type: mixed
|
||||
value: '{{#1752489759475.content#}}'
|
||||
max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
parent_mode:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- parent_mode
|
||||
remove_extra_spaces:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
remove_urls_emails:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
subchunk_max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- child_max_chunk_length
|
||||
subchunk_separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.child_delimiter#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752490343805'
|
||||
position:
|
||||
x: 1077.0240183162543
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1077.0240183162543
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -487.2912544090391
|
||||
y: -54.7029301848807
|
||||
zoom: 0.9994011715768695
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: Delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 1024
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n
|
||||
label: Child delimiter
|
||||
max_length: 199
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: child_delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 512
|
||||
label: Child max chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: child_max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: paragraph
|
||||
label: Parent mode
|
||||
max_length: 48
|
||||
options:
|
||||
- full_doc
|
||||
- paragraph
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: select
|
||||
unit: null
|
||||
variable: parent_mode
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,666 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: ''
|
||||
icon_type: emoji
|
||||
name: website-crawl-general-economy
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752491761974-source-1752565435219-target
|
||||
source: '1752491761974'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752565402678-source-1752565435219-target
|
||||
source: '1752565402678'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752565435219-source-1752569675978-target
|
||||
source: '1752565435219'
|
||||
sourceHandle: source
|
||||
target: '1752569675978'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752569675978-source-1752477924228-target
|
||||
source: '1752569675978'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752569675978'
|
||||
- result
|
||||
indexing_technique: economy
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: keyword_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: true
|
||||
title: 知识库
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 2140.4053851189346
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 2140.4053851189346
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Jina Reader
|
||||
datasource_name: jina_reader
|
||||
datasource_parameters:
|
||||
crawl_sub_pages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_crawl_sub_pages#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752491761974'
|
||||
- jina_limit
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_url#}}'
|
||||
use_sitemap:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_use_sitemap#}}'
|
||||
plugin_id: langgenius/jina_datasource
|
||||
provider_name: jina
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Jina Reader
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752491761974'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Firecrawl
|
||||
datasource_name: crawl
|
||||
datasource_parameters:
|
||||
crawl_subpages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_crawl_sub_pages#}}'
|
||||
exclude_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_exclude_paths#}}'
|
||||
include_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_include_only_paths#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_limit
|
||||
max_depth:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_max_depth
|
||||
only_main_content:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_extract_main_content#}}'
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_url#}}'
|
||||
plugin_id: langgenius/firecrawl_datasource
|
||||
provider_name: firecrawl
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Firecrawl
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752565402678'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
output_type: string
|
||||
selected: false
|
||||
title: 变量聚合器
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752491761974'
|
||||
- content
|
||||
- - '1752565402678'
|
||||
- content
|
||||
height: 129
|
||||
id: '1752565435219'
|
||||
position:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: The text you want to chunk.
|
||||
pt_BR: The text you want to chunk.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: Input Variable
|
||||
pt_BR: Input Variable
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: The delimiter of the chunks.
|
||||
pt_BR: The delimiter of the chunks.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: Delimiter
|
||||
pt_BR: Delimiter
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: The maximum chunk length.
|
||||
pt_BR: The maximum chunk length.
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: Maximum Chunk Length
|
||||
pt_BR: Maximum Chunk Length
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: The chunk overlap length.
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: Chunk Overlap Length
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: Replace consecutive spaces, newlines and tabs
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: Replace Consecutive Spaces, Newlines and Tabs
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: Delete all URLs and email addresses
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: Delete All URLs and Email Addresses
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunk/general_chunk
|
||||
provider_name: langgenius/general_chunk/general_chunk
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: 通用文本分块
|
||||
tool_configurations: {}
|
||||
tool_description: 一个用于通用文本分块模式的工具,检索和召回的块是相同的。
|
||||
tool_label: 通用文本分块
|
||||
tool_name: general_chunk
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752565435219.output#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752569675978'
|
||||
position:
|
||||
x: 1807.4306671642219
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1807.4306671642219
|
||||
y: 281.3910724383104
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -707.721097109337
|
||||
y: -93.07807382100896
|
||||
zoom: 0.9350632198875476
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: jina_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: jina_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Use sitemap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl
|
||||
iteratively based on page relevance, yielding fewer but higher-quality pages.
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_use_sitemap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: true
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Max depth
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: ''
|
||||
required: false
|
||||
tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes
|
||||
the page of the entered url, depth 1 scrapes the url and everything after enteredURL
|
||||
+ one /, and so on.
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_max_depth
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Exclude paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: blog/*, /about/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_exclude_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Include only paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: articles/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_include_only_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: firecrawl_extract_main_content
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_extract_main_content
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 1024
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 50
|
||||
label: chunk_overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: Setting the chunk overlap can maintain the semantic relevance between
|
||||
them, enhancing the retrieve effect. It is recommended to set 10%–25% of the
|
||||
maximum chunk size.
|
||||
type: number
|
||||
unit: characters
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: replace_consecutive_spaces
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
@ -0,0 +1,666 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: '#FFF4ED'
|
||||
icon_type: emoji
|
||||
name: website-crawl-general-high-quality
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752491761974-source-1752565435219-target
|
||||
source: '1752491761974'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752565402678-source-1752565435219-target
|
||||
source: '1752565402678'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752565435219-source-1752569675978-target
|
||||
source: '1752565435219'
|
||||
sourceHandle: source
|
||||
target: '1752569675978'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752569675978-source-1752477924228-target
|
||||
source: '1752569675978'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752569675978'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: false
|
||||
title: 知识库
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 2140.4053851189346
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 2140.4053851189346
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Jina Reader
|
||||
datasource_name: jina_reader
|
||||
datasource_parameters:
|
||||
crawl_sub_pages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_crawl_sub_pages#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752491761974'
|
||||
- jina_limit
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_url#}}'
|
||||
use_sitemap:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_use_sitemap#}}'
|
||||
plugin_id: langgenius/jina_datasource
|
||||
provider_name: jina
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Jina Reader
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752491761974'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Firecrawl
|
||||
datasource_name: crawl
|
||||
datasource_parameters:
|
||||
crawl_subpages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_crawl_sub_pages#}}'
|
||||
exclude_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_exclude_paths#}}'
|
||||
include_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_include_only_paths#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_limit
|
||||
max_depth:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_max_depth
|
||||
only_main_content:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_extract_main_content#}}'
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_url#}}'
|
||||
plugin_id: langgenius/firecrawl_datasource
|
||||
provider_name: firecrawl
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Firecrawl
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752565402678'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
output_type: string
|
||||
selected: false
|
||||
title: 变量聚合器
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752491761974'
|
||||
- content
|
||||
- - '1752565402678'
|
||||
- content
|
||||
height: 129
|
||||
id: '1752565435219'
|
||||
position:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: The text you want to chunk.
|
||||
pt_BR: The text you want to chunk.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: Input Variable
|
||||
pt_BR: Input Variable
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: The delimiter of the chunks.
|
||||
pt_BR: The delimiter of the chunks.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: Delimiter
|
||||
pt_BR: Delimiter
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: The maximum chunk length.
|
||||
pt_BR: The maximum chunk length.
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: Maximum Chunk Length
|
||||
pt_BR: Maximum Chunk Length
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: The chunk overlap length.
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: Chunk Overlap Length
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: Replace consecutive spaces, newlines and tabs
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: Replace Consecutive Spaces, Newlines and Tabs
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: Delete all URLs and email addresses
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: Delete All URLs and Email Addresses
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunk/general_chunk
|
||||
provider_name: langgenius/general_chunk/general_chunk
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: 通用文本分块
|
||||
tool_configurations: {}
|
||||
tool_description: 一个用于通用文本分块模式的工具,检索和召回的块是相同的。
|
||||
tool_label: 通用文本分块
|
||||
tool_name: general_chunk
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752565435219.output#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752569675978'
|
||||
position:
|
||||
x: 1807.4306671642219
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1807.4306671642219
|
||||
y: 281.3910724383104
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -707.721097109337
|
||||
y: -93.07807382100896
|
||||
zoom: 0.9350632198875476
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: jina_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: jina_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Use sitemap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl
|
||||
iteratively based on page relevance, yielding fewer but higher-quality pages.
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_use_sitemap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: true
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Max depth
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: ''
|
||||
required: false
|
||||
tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes
|
||||
the page of the entered url, depth 1 scrapes the url and everything after enteredURL
|
||||
+ one /, and so on.
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_max_depth
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Exclude paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: blog/*, /about/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_exclude_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Include only paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: articles/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_include_only_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: firecrawl_extract_main_content
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_extract_main_content
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 1024
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 50
|
||||
label: chunk_overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: Setting the chunk overlap can maintain the semantic relevance between
|
||||
them, enhancing the retrieve effect. It is recommended to set 10%–25% of the
|
||||
maximum chunk size.
|
||||
type: number
|
||||
unit: characters
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: replace_consecutive_spaces
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
@ -0,0 +1,772 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: package
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/parent_child_chunk:0.0.1@f8f9ba1f3bcda159ebc0168baa755c2181b923da8157ebb439b8046019f5b510
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: ''
|
||||
icon_type: emoji
|
||||
name: website-crawl-parentchild
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752490343805-source-1752477924228-target
|
||||
source: '1752490343805'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752491761974-source-1752565435219-target
|
||||
source: '1752491761974'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752565435219-source-1752490343805-target
|
||||
source: '1752565435219'
|
||||
sourceHandle: source
|
||||
target: '1752490343805'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752565402678-source-1752565435219-target
|
||||
source: '1752565402678'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: hierarchical_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752490343805'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: false
|
||||
title: 知识库
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 2215.5544306817387
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 2215.5544306817387
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: Parent child chunks result
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: The text you want to chunk.
|
||||
pt_BR: The text you want to chunk.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input text
|
||||
ja_JP: Input text
|
||||
pt_BR: Input text
|
||||
zh_Hans: 输入文本
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_text
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 1024
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for chunking
|
||||
ja_JP: Maximum length for chunking
|
||||
pt_BR: Comprimento máximo para divisão
|
||||
zh_Hans: 用于分块的最大长度
|
||||
label:
|
||||
en_US: Maximum Length
|
||||
ja_JP: Maximum Length
|
||||
pt_BR: Comprimento Máximo
|
||||
zh_Hans: 最大长度
|
||||
llm_description: Maximum length allowed per chunk
|
||||
max: null
|
||||
min: null
|
||||
name: max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '
|
||||
|
||||
|
||||
'
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for chunking
|
||||
ja_JP: Separator used for chunking
|
||||
pt_BR: Separador usado para divisão
|
||||
zh_Hans: 用于分块的分隔符
|
||||
label:
|
||||
en_US: Chunk Separator
|
||||
ja_JP: Chunk Separator
|
||||
pt_BR: Separador de Divisão
|
||||
zh_Hans: 分块分隔符
|
||||
llm_description: The separator used to split chunks
|
||||
max: null
|
||||
min: null
|
||||
name: separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 512
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for subchunking
|
||||
ja_JP: Maximum length for subchunking
|
||||
pt_BR: Comprimento máximo para subdivisão
|
||||
zh_Hans: 用于子分块的最大长度
|
||||
label:
|
||||
en_US: Subchunk Maximum Length
|
||||
ja_JP: Subchunk Maximum Length
|
||||
pt_BR: Comprimento Máximo de Subdivisão
|
||||
zh_Hans: 子分块最大长度
|
||||
llm_description: Maximum length allowed per subchunk
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '. '
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for subchunking
|
||||
ja_JP: Separator used for subchunking
|
||||
pt_BR: Separador usado para subdivisão
|
||||
zh_Hans: 用于子分块的分隔符
|
||||
label:
|
||||
en_US: Subchunk Separator
|
||||
ja_JP: Subchunk Separator
|
||||
pt_BR: Separador de Subdivisão
|
||||
zh_Hans: 子分块分隔符
|
||||
llm_description: The separator used to split subchunks
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: paragraph
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Split text into paragraphs based on separator and maximum chunk
|
||||
length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
ja_JP: Split text into paragraphs based on separator and maximum chunk
|
||||
length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
pt_BR: Dividir texto em parágrafos com base no separador e no comprimento
|
||||
máximo do bloco, usando o texto dividido como bloco pai ou documento
|
||||
completo como bloco pai e diretamente recuperá-lo.
|
||||
zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。
|
||||
label:
|
||||
en_US: Parent Mode
|
||||
ja_JP: Parent Mode
|
||||
pt_BR: Modo Pai
|
||||
zh_Hans: 父块模式
|
||||
llm_description: Split text into paragraphs based on separator and maximum
|
||||
chunk length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
max: null
|
||||
min: null
|
||||
name: parent_mode
|
||||
options:
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Paragraph
|
||||
ja_JP: Paragraph
|
||||
pt_BR: Parágrafo
|
||||
zh_Hans: 段落
|
||||
value: paragraph
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Full Document
|
||||
ja_JP: Full Document
|
||||
pt_BR: Documento Completo
|
||||
zh_Hans: 全文
|
||||
value: full_doc
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: select
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove extra spaces in the text
|
||||
ja_JP: Whether to remove extra spaces in the text
|
||||
pt_BR: Se deve remover espaços extras no texto
|
||||
zh_Hans: 是否移除文本中的多余空格
|
||||
label:
|
||||
en_US: Remove Extra Spaces
|
||||
ja_JP: Remove Extra Spaces
|
||||
pt_BR: Remover Espaços Extras
|
||||
zh_Hans: 移除多余空格
|
||||
llm_description: Whether to remove extra spaces in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_extra_spaces
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove URLs and emails in the text
|
||||
ja_JP: Whether to remove URLs and emails in the text
|
||||
pt_BR: Se deve remover URLs e e-mails no texto
|
||||
zh_Hans: 是否移除文本中的URL和电子邮件地址
|
||||
label:
|
||||
en_US: Remove URLs and Emails
|
||||
ja_JP: Remove URLs and Emails
|
||||
pt_BR: Remover URLs e E-mails
|
||||
zh_Hans: 移除URL和电子邮件地址
|
||||
llm_description: Whether to remove URLs and emails in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_urls_emails
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
input_text: ''
|
||||
max_length: ''
|
||||
parent_mode: ''
|
||||
remove_extra_spaces: ''
|
||||
remove_urls_emails: ''
|
||||
separator: ''
|
||||
subchunk_max_length: ''
|
||||
subchunk_separator: ''
|
||||
provider_id: langgenius/parent_child_chunk/parent_child_chunk
|
||||
provider_name: langgenius/parent_child_chunk/parent_child_chunk
|
||||
provider_type: builtin
|
||||
selected: true
|
||||
title: 父子分块处理器
|
||||
tool_configurations: {}
|
||||
tool_description: 将文档处理为父子分块结构
|
||||
tool_label: 父子分块处理器
|
||||
tool_name: parent_child_chunk
|
||||
tool_parameters:
|
||||
input_text:
|
||||
type: mixed
|
||||
value: '{{#1752565435219.output#}}'
|
||||
max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
parent_mode:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- parent_mode
|
||||
remove_extra_spaces:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
remove_urls_emails:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
subchunk_max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- child_max_chunk_length
|
||||
subchunk_separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.child_delimiter#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752490343805'
|
||||
position:
|
||||
x: 1853.5260563244174
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1853.5260563244174
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Jina Reader
|
||||
datasource_name: jina_reader
|
||||
datasource_parameters:
|
||||
crawl_sub_pages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_crawl_sub_pages#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752491761974'
|
||||
- jina_limit
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_url#}}'
|
||||
use_sitemap:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_use_sitemap#}}'
|
||||
plugin_id: langgenius/jina_datasource
|
||||
provider_name: jina
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Jina Reader
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752491761974'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Firecrawl
|
||||
datasource_name: crawl
|
||||
datasource_parameters:
|
||||
crawl_subpages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_crawl_sub_pages#}}'
|
||||
exclude_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_exclude_paths#}}'
|
||||
include_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_include_only_paths#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_limit
|
||||
max_depth:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_max_depth
|
||||
only_main_content:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_extract_main_content#}}'
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_url#}}'
|
||||
plugin_id: langgenius/firecrawl_datasource
|
||||
provider_name: firecrawl
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Firecrawl
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752565402678'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
output_type: string
|
||||
selected: false
|
||||
title: 变量聚合器
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752491761974'
|
||||
- content
|
||||
- - '1752565402678'
|
||||
- content
|
||||
height: 129
|
||||
id: '1752565435219'
|
||||
position:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -826.1791044466438
|
||||
y: -71.91725474841303
|
||||
zoom: 0.9980166672552107
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: jina_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: jina_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Use sitemap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl
|
||||
iteratively based on page relevance, yielding fewer but higher-quality pages.
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_use_sitemap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: true
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Max depth
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: ''
|
||||
required: false
|
||||
tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes
|
||||
the page of the entered url, depth 1 scrapes the url and everything after enteredURL
|
||||
+ one /, and so on.
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_max_depth
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Exclude paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: blog/*, /about/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_exclude_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Include only paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: articles/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_include_only_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: firecrawl_extract_main_content
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_extract_main_content
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 1024
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n
|
||||
label: Child delimiter
|
||||
max_length: 199
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: child_delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 512
|
||||
label: Child max chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: child_max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: paragraph
|
||||
label: Parent mode
|
||||
max_length: 48
|
||||
options:
|
||||
- full_doc
|
||||
- paragraph
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: select
|
||||
unit: null
|
||||
variable: parent_mode
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
Loading…
x
Reference in New Issue
Block a user