ragflow/rag/utils/opendal_conn.py

121 lines
4.2 KiB
Python
Raw Normal View History

import opendal
import logging
import pymysql
import yaml
from rag.utils import singleton
SERVICE_CONF_PATH = "conf/service_conf.yaml"
CREATE_TABLE_SQL = """
CREATE TABLE IF NOT EXISTS `{}` (
`key` VARCHAR(255) PRIMARY KEY,
`value` LONGBLOB,
`created_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
`updated_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
);
"""
SET_MAX_ALLOWED_PACKET_SQL = """
SET GLOBAL max_allowed_packet={}
"""
def get_opendal_config_from_yaml(yaml_path=SERVICE_CONF_PATH):
try:
with open(yaml_path, 'r') as f:
config = yaml.safe_load(f)
opendal_config = config.get('opendal', {})
kwargs = {}
if opendal_config.get("scheme") == 'mysql':
mysql_config = config.get('mysql', {})
kwargs = {
"scheme": "mysql",
"host": mysql_config.get("host", "127.0.0.1"),
"port": str(mysql_config.get("port", 3306)),
"user": mysql_config.get("user", "root"),
"password": mysql_config.get("password", ""),
"database": mysql_config.get("name", "test_open_dal"),
"table": opendal_config.get("config").get("table", "opendal_storage")
}
kwargs["connection_string"] = f"mysql://{kwargs['user']}:{kwargs['password']}@{kwargs['host']}:{kwargs['port']}/{kwargs['database']}"
else:
scheme = opendal_config.get("scheme")
config_data = opendal_config.get("config", {})
kwargs = {"scheme": scheme, **config_data}
logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs)
return kwargs
except Exception as e:
logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e))
raise
@singleton
class OpenDALStorage:
def __init__(self):
self._kwargs = get_opendal_config_from_yaml()
self._scheme = self._kwargs.get('scheme', 'mysql')
if self._scheme == 'mysql':
self.init_db_config()
self.init_opendal_mysql_table()
Fix: opendal storage health attribute not found & remove duplicate operator scheme initialization (#8265) ### What problem does this PR solve? This PR fixes two issues in the OpenDAL storage connector: 1. The ‎`health` method was missing, which prevented health checks on the storage backend. 3. The initialization of the ‎`opendal.Operator` object included a redundant scheme parameter, causing unnecessary duplication and potential confusion. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### Background - The absence of a ‎`health` method made it difficult to verify the availability and reliability of the storage service. - Initializing ‎`opendal.Operator` with both ‎`self._scheme` and unpacked ‎`**self._kwargs` could lead to errors or unexpected behavior if the scheme was already included in the kwargs. ### What is changed and how it works? - Adds a ‎`health` method that writes a test file to verify storage availability. - Removes the duplicate scheme parameter from the ‎`opendal.Operator` initialization to ensure clarity and prevent conflicts. before: <img width="762" alt="企业微信截图_46be646f-2e99-4e5e-be67-b1483426e77c" src="https://github.com/user-attachments/assets/acecbb8c-4810-457f-8342-6355148551ba" /> <img width="767" alt="image" src="https://github.com/user-attachments/assets/147cd5a2-dde3-466b-a9c1-d1d4f0819e5d" /> after: <img width="1123" alt="企业微信截图_09d62997-8908-4985-b89f-7a78b5da55ac" src="https://github.com/user-attachments/assets/97dc88c9-0f4e-4d77-88b3-cd818e8da046" />
2025-06-16 11:35:51 +08:00
self._operator = opendal.Operator(**self._kwargs)
logging.info("OpenDALStorage initialized successfully")
Fix: opendal storage health attribute not found & remove duplicate operator scheme initialization (#8265) ### What problem does this PR solve? This PR fixes two issues in the OpenDAL storage connector: 1. The ‎`health` method was missing, which prevented health checks on the storage backend. 3. The initialization of the ‎`opendal.Operator` object included a redundant scheme parameter, causing unnecessary duplication and potential confusion. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### Background - The absence of a ‎`health` method made it difficult to verify the availability and reliability of the storage service. - Initializing ‎`opendal.Operator` with both ‎`self._scheme` and unpacked ‎`**self._kwargs` could lead to errors or unexpected behavior if the scheme was already included in the kwargs. ### What is changed and how it works? - Adds a ‎`health` method that writes a test file to verify storage availability. - Removes the duplicate scheme parameter from the ‎`opendal.Operator` initialization to ensure clarity and prevent conflicts. before: <img width="762" alt="企业微信截图_46be646f-2e99-4e5e-be67-b1483426e77c" src="https://github.com/user-attachments/assets/acecbb8c-4810-457f-8342-6355148551ba" /> <img width="767" alt="image" src="https://github.com/user-attachments/assets/147cd5a2-dde3-466b-a9c1-d1d4f0819e5d" /> after: <img width="1123" alt="企业微信截图_09d62997-8908-4985-b89f-7a78b5da55ac" src="https://github.com/user-attachments/assets/97dc88c9-0f4e-4d77-88b3-cd818e8da046" />
2025-06-16 11:35:51 +08:00
def health(self):
bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
r = self._operator.write(f"{bucket}/{fnm}", binary)
return r
def put(self, bucket, fnm, binary):
self._operator.write(f"{bucket}/{fnm}", binary)
def get(self, bucket, fnm):
return self._operator.read(f"{bucket}/{fnm}")
def rm(self, bucket, fnm):
self._operator.delete(f"{bucket}/{fnm}")
self._operator.__init__()
def scan(self, bucket, fnm):
return self._operator.scan(f"{bucket}/{fnm}")
def obj_exist(self, bucket, fnm):
return self._operator.exists(f"{bucket}/{fnm}")
def init_db_config(self):
try:
conn = pymysql.connect(
host=self._kwargs['host'],
port=int(self._kwargs['port']),
user=self._kwargs['user'],
password=self._kwargs['password'],
database=self._kwargs['database']
)
cursor = conn.cursor()
max_packet = self._kwargs.get('max_allowed_packet', 4194304) # Default to 4MB if not specified
cursor.execute(SET_MAX_ALLOWED_PACKET_SQL.format(max_packet))
conn.commit()
cursor.close()
conn.close()
logging.info(f"Database configuration initialized with max_allowed_packet={max_packet}")
except Exception as e:
logging.error(f"Failed to initialize database configuration: {str(e)}")
raise
def init_opendal_mysql_table(self):
conn = pymysql.connect(
host=self._kwargs['host'],
port=int(self._kwargs['port']),
user=self._kwargs['user'],
password=self._kwargs['password'],
database=self._kwargs['database']
)
cursor = conn.cursor()
cursor.execute(CREATE_TABLE_SQL.format(self._kwargs['table']))
conn.commit()
cursor.close()
conn.close()
logging.info(f"Table `{self._kwargs['table']}` initialized.")