2024-08-15 09:17:36 +08:00
|
|
|
#
|
|
|
|
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
#
|
|
|
|
import os
|
2024-11-30 18:48:06 +08:00
|
|
|
import logging
|
2024-08-15 09:17:36 +08:00
|
|
|
from api.utils import get_base_config, decrypt_database_config
|
|
|
|
from api.utils.file_utils import get_project_base_directory
|
|
|
|
|
|
|
|
# Server
|
|
|
|
RAG_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
|
|
|
|
|
2025-03-20 10:45:40 +08:00
|
|
|
# Get storage type and document engine from system environment variables
|
|
|
|
STORAGE_IMPL_TYPE = os.getenv('STORAGE_IMPL', 'MINIO')
|
|
|
|
DOC_ENGINE = os.getenv('DOC_ENGINE', 'elasticsearch')
|
|
|
|
|
|
|
|
ES = {}
|
|
|
|
INFINITY = {}
|
|
|
|
AZURE = {}
|
|
|
|
S3 = {}
|
|
|
|
MINIO = {}
|
|
|
|
OSS = {}
|
2025-04-24 16:03:31 +08:00
|
|
|
OS = {}
|
2025-03-20 10:45:40 +08:00
|
|
|
|
|
|
|
# Initialize the selected configuration data based on environment variables to solve the problem of initialization errors due to lack of configuration
|
|
|
|
if DOC_ENGINE == 'elasticsearch':
|
|
|
|
ES = get_base_config("es", {})
|
2025-04-24 16:03:31 +08:00
|
|
|
elif DOC_ENGINE == 'opensearch':
|
|
|
|
OS = get_base_config("os", {})
|
2025-03-20 10:45:40 +08:00
|
|
|
elif DOC_ENGINE == 'infinity':
|
|
|
|
INFINITY = get_base_config("infinity", {"uri": "infinity:23817"})
|
|
|
|
|
|
|
|
if STORAGE_IMPL_TYPE in ['AZURE_SPN', 'AZURE_SAS']:
|
|
|
|
AZURE = get_base_config("azure", {})
|
|
|
|
elif STORAGE_IMPL_TYPE == 'AWS_S3':
|
|
|
|
S3 = get_base_config("s3", {})
|
|
|
|
elif STORAGE_IMPL_TYPE == 'MINIO':
|
|
|
|
MINIO = decrypt_database_config(name="minio")
|
|
|
|
elif STORAGE_IMPL_TYPE == 'OSS':
|
|
|
|
OSS = get_base_config("oss", {})
|
|
|
|
|
2024-08-15 09:17:36 +08:00
|
|
|
try:
|
|
|
|
REDIS = decrypt_database_config(name="redis")
|
2024-11-12 14:59:41 +08:00
|
|
|
except Exception:
|
2024-08-15 09:17:36 +08:00
|
|
|
REDIS = {}
|
|
|
|
pass
|
|
|
|
DOC_MAXIMUM_SIZE = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024))
|
Feat: make document parsing and embedding batch sizes configurable via environment variables (#8266)
### Description
This PR introduces two new environment variables, `DOC_BULK_SIZE` and
`EMBEDDING_BATCH_SIZE`, to allow flexible tuning of batch sizes for
document parsing and embedding vectorization in RAGFlow. By making these
parameters configurable, users can optimize performance and resource
usage according to their hardware capabilities and workload
requirements.
### What problem does this PR solve?
Previously, the batch sizes for document parsing and embedding were
hardcoded, limiting the ability to adjust throughput and memory
consumption. This PR enables users to set these values via environment
variables (in `.env`, Helm chart, or directly in the deployment
environment), improving flexibility and scalability for both small and
large deployments.
- `DOC_BULK_SIZE`: Controls how many document chunks are processed in a
single batch during document parsing (default: 4).
- `EMBEDDING_BATCH_SIZE`: Controls how many text chunks are processed
in a single batch during embedding vectorization (default: 16).
This change updates the codebase, documentation, and configuration files
to reflect the new options.
### Type of change
- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update
- [ ] Refactoring
- [x] Performance Improvement
- [ ] Other (please describe):
### Additional context
- Updated `.env`, `helm/values.yaml`, and documentation to describe
the new variables.
- Modified relevant code paths to use the environment variables instead
of hardcoded values.
- Users can now tune these parameters to achieve better throughput or
reduce memory usage as needed.
Before:
Default value:
<img width="643" alt="image"
src="https://github.com/user-attachments/assets/086e1173-18f3-419d-a0f5-68394f63866a"
/>
After:
10x:
<img width="777" alt="image"
src="https://github.com/user-attachments/assets/5722bbc0-0bcb-4536-b928-077031e550f1"
/>
2025-06-16 13:40:47 +08:00
|
|
|
DOC_BULK_SIZE = int(os.environ.get("DOC_BULK_SIZE", 4))
|
|
|
|
EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE", 16))
|
2024-08-15 09:17:36 +08:00
|
|
|
SVR_QUEUE_NAME = "rag_flow_svr_queue"
|
2025-03-14 23:43:46 +08:00
|
|
|
SVR_CONSUMER_GROUP_NAME = "rag_flow_svr_task_broker"
|
2025-01-09 17:07:21 +08:00
|
|
|
PAGERANK_FLD = "pagerank_fea"
|
|
|
|
TAG_FLD = "tag_feas"
|
|
|
|
|
2025-05-29 13:32:16 +08:00
|
|
|
PARALLEL_DEVICES = 0
|
2025-03-17 16:49:54 +08:00
|
|
|
try:
|
|
|
|
import torch.cuda
|
|
|
|
PARALLEL_DEVICES = torch.cuda.device_count()
|
|
|
|
logging.info(f"found {PARALLEL_DEVICES} gpus")
|
|
|
|
except Exception:
|
|
|
|
logging.info("can't import package 'torch'")
|
2024-11-30 18:48:06 +08:00
|
|
|
|
|
|
|
def print_rag_settings():
|
|
|
|
logging.info(f"MAX_CONTENT_LENGTH: {DOC_MAXIMUM_SIZE}")
|
2025-03-14 23:43:46 +08:00
|
|
|
logging.info(f"MAX_FILE_COUNT_PER_USER: {int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))}")
|
|
|
|
|
|
|
|
|
|
|
|
def get_svr_queue_name(priority: int) -> str:
|
|
|
|
if priority == 0:
|
|
|
|
return SVR_QUEUE_NAME
|
|
|
|
return f"{SVR_QUEUE_NAME}_{priority}"
|
|
|
|
|
|
|
|
def get_svr_queue_names():
|
|
|
|
return [get_svr_queue_name(priority) for priority in [1, 0]]
|