mirror of
https://github.com/HKUDS/LightRAG.git
synced 2026-01-06 11:51:00 +00:00
Fix: Resolve timezone handling problem in PostgreSQL storage
- Changed timestamp columns to naive UTC - Added datetime formatting utilities - Updated SQL templates for timestamp extraction - Simplified timestamp migration logic
This commit is contained in:
parent
375bfd57a4
commit
7e988158a9
@ -209,20 +209,20 @@ class PostgreSQLDB:
|
||||
|
||||
# Check column type
|
||||
data_type = column_info.get("data_type")
|
||||
if data_type == "timestamp with time zone":
|
||||
logger.info(
|
||||
if data_type == "timestamp without time zone":
|
||||
logger.debug(
|
||||
f"Column {table_name}.{column_name} is already timezone-aware, no migration needed"
|
||||
)
|
||||
continue
|
||||
|
||||
# Execute migration, explicitly specifying UTC timezone for interpreting original data
|
||||
logger.info(
|
||||
f"Migrating {table_name}.{column_name} to timezone-aware type"
|
||||
f"Migrating {table_name}.{column_name} from {data_type} to TIMESTAMP(0) type"
|
||||
)
|
||||
migration_sql = f"""
|
||||
ALTER TABLE {table_name}
|
||||
ALTER COLUMN {column_name} TYPE TIMESTAMP(0) WITH TIME ZONE
|
||||
USING {column_name} AT TIME ZONE 'UTC'
|
||||
ALTER COLUMN {column_name} TYPE TIMESTAMP(0),
|
||||
ALTER COLUMN {column_name} SET DEFAULT CURRENT_TIMESTAMP
|
||||
"""
|
||||
|
||||
await self.execute(migration_sql)
|
||||
@ -569,7 +569,7 @@ class PostgreSQLDB:
|
||||
f"Successfully migrated {migration['table']}.{migration['column']}"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
logger.debug(
|
||||
f"Column {migration['table']}.{migration['column']} already has correct type, no migration needed"
|
||||
)
|
||||
|
||||
@ -1054,7 +1054,8 @@ class PGKVStorage(BaseKVStorage):
|
||||
return
|
||||
|
||||
if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
|
||||
current_time = datetime.datetime.now(timezone.utc)
|
||||
# Get current UTC time and convert to naive datetime for database storage
|
||||
current_time = datetime.datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
for k, v in data.items():
|
||||
upsert_sql = SQL_TEMPLATES["upsert_text_chunk"]
|
||||
_data = {
|
||||
@ -1292,8 +1293,8 @@ class PGVectorStorage(BaseVectorStorage):
|
||||
if not data:
|
||||
return
|
||||
|
||||
# Get current time with UTC timezone
|
||||
current_time = datetime.datetime.now(timezone.utc)
|
||||
# Get current UTC time and convert to naive datetime for database storage
|
||||
current_time = datetime.datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
list_data = [
|
||||
{
|
||||
"__id__": k,
|
||||
@ -1489,6 +1490,15 @@ class PGVectorStorage(BaseVectorStorage):
|
||||
class PGDocStatusStorage(DocStatusStorage):
|
||||
db: PostgreSQLDB = field(default=None)
|
||||
|
||||
def _format_datetime_with_timezone(self, dt):
|
||||
"""Convert datetime to ISO format string with timezone info"""
|
||||
if dt is None:
|
||||
return None
|
||||
# If no timezone info, assume it's UTC time
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt.isoformat()
|
||||
|
||||
async def initialize(self):
|
||||
if self.db is None:
|
||||
self.db = await ClientManager.get_client()
|
||||
@ -1548,14 +1558,18 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||
except json.JSONDecodeError:
|
||||
chunks_list = []
|
||||
|
||||
# Convert datetime objects to ISO format strings with timezone info
|
||||
created_at = self._format_datetime_with_timezone(result[0]["created_at"])
|
||||
updated_at = self._format_datetime_with_timezone(result[0]["updated_at"])
|
||||
|
||||
return dict(
|
||||
content=result[0]["content"],
|
||||
content_length=result[0]["content_length"],
|
||||
content_summary=result[0]["content_summary"],
|
||||
status=result[0]["status"],
|
||||
chunks_count=result[0]["chunks_count"],
|
||||
created_at=result[0]["created_at"],
|
||||
updated_at=result[0]["updated_at"],
|
||||
created_at=created_at,
|
||||
updated_at=updated_at,
|
||||
file_path=result[0]["file_path"],
|
||||
chunks_list=chunks_list,
|
||||
)
|
||||
@ -1583,6 +1597,10 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||
except json.JSONDecodeError:
|
||||
chunks_list = []
|
||||
|
||||
# Convert datetime objects to ISO format strings with timezone info
|
||||
created_at = self._format_datetime_with_timezone(row["created_at"])
|
||||
updated_at = self._format_datetime_with_timezone(row["updated_at"])
|
||||
|
||||
processed_results.append(
|
||||
{
|
||||
"content": row["content"],
|
||||
@ -1590,8 +1608,8 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||
"content_summary": row["content_summary"],
|
||||
"status": row["status"],
|
||||
"chunks_count": row["chunks_count"],
|
||||
"created_at": row["created_at"],
|
||||
"updated_at": row["updated_at"],
|
||||
"created_at": created_at,
|
||||
"updated_at": updated_at,
|
||||
"file_path": row["file_path"],
|
||||
"chunks_list": chunks_list,
|
||||
}
|
||||
@ -1629,13 +1647,17 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||
except json.JSONDecodeError:
|
||||
chunks_list = []
|
||||
|
||||
# Convert datetime objects to ISO format strings with timezone info
|
||||
created_at = self._format_datetime_with_timezone(element["created_at"])
|
||||
updated_at = self._format_datetime_with_timezone(element["updated_at"])
|
||||
|
||||
docs_by_status[element["id"]] = DocProcessingStatus(
|
||||
content=element["content"],
|
||||
content_summary=element["content_summary"],
|
||||
content_length=element["content_length"],
|
||||
status=element["status"],
|
||||
created_at=element["created_at"],
|
||||
updated_at=element["updated_at"],
|
||||
created_at=created_at,
|
||||
updated_at=updated_at,
|
||||
chunks_count=element["chunks_count"],
|
||||
file_path=element["file_path"],
|
||||
chunks_list=chunks_list,
|
||||
@ -1687,19 +1709,26 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||
return
|
||||
|
||||
def parse_datetime(dt_str):
|
||||
"""Parse datetime and ensure it's stored as UTC time in database"""
|
||||
if dt_str is None:
|
||||
return None
|
||||
if isinstance(dt_str, (datetime.date, datetime.datetime)):
|
||||
# If it's a datetime object without timezone info, remove timezone info
|
||||
# If it's a datetime object
|
||||
if isinstance(dt_str, datetime.datetime):
|
||||
# Remove timezone info, return naive datetime object
|
||||
return dt_str.replace(tzinfo=None)
|
||||
# If no timezone info, assume it's UTC
|
||||
if dt_str.tzinfo is None:
|
||||
dt_str = dt_str.replace(tzinfo=timezone.utc)
|
||||
# Convert to UTC and remove timezone info for storage
|
||||
return dt_str.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
return dt_str
|
||||
try:
|
||||
# Process ISO format string with timezone
|
||||
dt = datetime.datetime.fromisoformat(dt_str)
|
||||
# Remove timezone info, return naive datetime object
|
||||
return dt.replace(tzinfo=None)
|
||||
# If no timezone info, assume it's UTC
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
# Convert to UTC and remove timezone info for storage
|
||||
return dt.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Unable to parse datetime string: {dt_str}")
|
||||
return None
|
||||
@ -3095,8 +3124,8 @@ TABLES = {
|
||||
doc_name VARCHAR(1024),
|
||||
content TEXT,
|
||||
meta JSONB,
|
||||
create_time TIMESTAMP(0),
|
||||
update_time TIMESTAMP(0),
|
||||
create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
|
||||
update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT LIGHTRAG_DOC_FULL_PK PRIMARY KEY (workspace, id)
|
||||
)"""
|
||||
},
|
||||
@ -3110,8 +3139,8 @@ TABLES = {
|
||||
content TEXT,
|
||||
file_path TEXT NULL,
|
||||
llm_cache_list JSONB NULL DEFAULT '[]'::jsonb,
|
||||
create_time TIMESTAMP(0) WITH TIME ZONE,
|
||||
update_time TIMESTAMP(0) WITH TIME ZONE,
|
||||
create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
|
||||
update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
|
||||
)"""
|
||||
},
|
||||
@ -3125,8 +3154,8 @@ TABLES = {
|
||||
content TEXT,
|
||||
content_vector VECTOR,
|
||||
file_path TEXT NULL,
|
||||
create_time TIMESTAMP(0) WITH TIME ZONE,
|
||||
update_time TIMESTAMP(0) WITH TIME ZONE,
|
||||
create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
|
||||
update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT LIGHTRAG_VDB_CHUNKS_PK PRIMARY KEY (workspace, id)
|
||||
)"""
|
||||
},
|
||||
@ -3137,8 +3166,8 @@ TABLES = {
|
||||
entity_name VARCHAR(512),
|
||||
content TEXT,
|
||||
content_vector VECTOR,
|
||||
create_time TIMESTAMP(0) WITH TIME ZONE,
|
||||
update_time TIMESTAMP(0) WITH TIME ZONE,
|
||||
create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
|
||||
update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
|
||||
chunk_ids VARCHAR(255)[] NULL,
|
||||
file_path TEXT NULL,
|
||||
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
||||
@ -3152,8 +3181,8 @@ TABLES = {
|
||||
target_id VARCHAR(512),
|
||||
content TEXT,
|
||||
content_vector VECTOR,
|
||||
create_time TIMESTAMP(0) WITH TIME ZONE,
|
||||
update_time TIMESTAMP(0) WITH TIME ZONE,
|
||||
create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
|
||||
update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP,
|
||||
chunk_ids VARCHAR(255)[] NULL,
|
||||
file_path TEXT NULL,
|
||||
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|
||||
@ -3168,7 +3197,7 @@ TABLES = {
|
||||
return_value TEXT,
|
||||
chunk_id VARCHAR(255) NULL,
|
||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
update_time TIMESTAMP,
|
||||
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, mode, id)
|
||||
)"""
|
||||
},
|
||||
@ -3183,8 +3212,8 @@ TABLES = {
|
||||
status varchar(64) NULL,
|
||||
file_path TEXT NULL,
|
||||
chunks_list JSONB NULL DEFAULT '[]'::jsonb,
|
||||
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NULL,
|
||||
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT LIGHTRAG_DOC_STATUS_PK PRIMARY KEY (workspace, id)
|
||||
)"""
|
||||
},
|
||||
@ -3199,11 +3228,13 @@ SQL_TEMPLATES = {
|
||||
"get_by_id_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content,
|
||||
chunk_order_index, full_doc_id, file_path,
|
||||
COALESCE(llm_cache_list, '[]'::jsonb) as llm_cache_list,
|
||||
create_time, update_time
|
||||
EXTRACT(EPOCH FROM create_time)::BIGINT as create_time,
|
||||
EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
|
||||
FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=$1 AND id=$2
|
||||
""",
|
||||
"get_by_id_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id, cache_type,
|
||||
create_time, update_time
|
||||
EXTRACT(EPOCH FROM create_time)::BIGINT as create_time,
|
||||
EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
|
||||
FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id=$2
|
||||
""",
|
||||
"get_by_mode_id_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id
|
||||
@ -3215,11 +3246,13 @@ SQL_TEMPLATES = {
|
||||
"get_by_ids_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content,
|
||||
chunk_order_index, full_doc_id, file_path,
|
||||
COALESCE(llm_cache_list, '[]'::jsonb) as llm_cache_list,
|
||||
create_time, update_time
|
||||
EXTRACT(EPOCH FROM create_time)::BIGINT as create_time,
|
||||
EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
|
||||
FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=$1 AND id IN ({ids})
|
||||
""",
|
||||
"get_by_ids_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id, cache_type,
|
||||
create_time, update_time
|
||||
EXTRACT(EPOCH FROM create_time)::BIGINT as create_time,
|
||||
EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
|
||||
FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id IN ({ids})
|
||||
""",
|
||||
"filter_keys": "SELECT id FROM {table_name} WHERE workspace=$1 AND id IN ({ids})",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user