mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-12-28 07:12:14 +00:00
Refactor: move build_file_path function from operate.py to utils.py
This commit is contained in:
parent
c8c3545454
commit
7b915b34f6
@ -28,6 +28,7 @@ from .utils import (
|
||||
remove_think_tags,
|
||||
linear_gradient_weighted_polling,
|
||||
process_chunks_unified,
|
||||
build_file_path,
|
||||
)
|
||||
from .base import (
|
||||
BaseGraphStorage,
|
||||
@ -43,7 +44,6 @@ from .constants import (
|
||||
DEFAULT_MAX_RELATION_TOKENS,
|
||||
DEFAULT_MAX_TOTAL_TOKENS,
|
||||
DEFAULT_RELATED_CHUNK_NUMBER,
|
||||
DEFAULT_MAX_FILE_PATH_LENGTH,
|
||||
)
|
||||
from .kg.shared_storage import get_storage_keyed_lock
|
||||
import time
|
||||
@ -3133,47 +3133,6 @@ async def kg_query_with_keywords(
|
||||
return response
|
||||
|
||||
|
||||
def build_file_path(already_file_paths, data_list, target):
|
||||
# set: deduplication
|
||||
file_paths_set = {fp for fp in already_file_paths if fp}
|
||||
|
||||
# string: filter empty value and keep file order in already_file_paths
|
||||
file_paths = GRAPH_FIELD_SEP.join(fp for fp in already_file_paths if fp)
|
||||
# ignored file_paths
|
||||
file_paths_ignore = ""
|
||||
# add file_paths
|
||||
for dp in data_list:
|
||||
cur_file_path = dp.get("file_path")
|
||||
# empty
|
||||
if not cur_file_path:
|
||||
continue
|
||||
|
||||
# skip duplicate item
|
||||
if cur_file_path in file_paths_set:
|
||||
continue
|
||||
# add
|
||||
file_paths_set.add(cur_file_path)
|
||||
|
||||
# check the length
|
||||
if (
|
||||
len(file_paths) + len(GRAPH_FIELD_SEP + cur_file_path)
|
||||
< DEFAULT_MAX_FILE_PATH_LENGTH
|
||||
):
|
||||
# append
|
||||
file_paths += (
|
||||
GRAPH_FIELD_SEP + cur_file_path if file_paths else cur_file_path
|
||||
)
|
||||
else:
|
||||
# ignore
|
||||
file_paths_ignore += GRAPH_FIELD_SEP + cur_file_path
|
||||
|
||||
if file_paths_ignore:
|
||||
logger.warning(
|
||||
f"Length of file_path exceeds {target}, ignoring new file: {file_paths_ignore}"
|
||||
)
|
||||
return file_paths
|
||||
|
||||
|
||||
# TODO: Deprecated, use user_prompt in QueryParam instead
|
||||
async def query_with_keywords(
|
||||
query: str,
|
||||
|
||||
@ -19,6 +19,8 @@ from lightrag.constants import (
|
||||
DEFAULT_LOG_MAX_BYTES,
|
||||
DEFAULT_LOG_BACKUP_COUNT,
|
||||
DEFAULT_LOG_FILENAME,
|
||||
GRAPH_FIELD_SEP,
|
||||
DEFAULT_MAX_FILE_PATH_LENGTH,
|
||||
)
|
||||
|
||||
|
||||
@ -1901,3 +1903,54 @@ async def process_chunks_unified(
|
||||
)
|
||||
|
||||
return unique_chunks
|
||||
|
||||
|
||||
def build_file_path(already_file_paths, data_list, target):
|
||||
"""Build file path string with length limit and deduplication
|
||||
|
||||
Args:
|
||||
already_file_paths: List of existing file paths
|
||||
data_list: List of data items containing file_path
|
||||
target: Target name for logging warnings
|
||||
|
||||
Returns:
|
||||
str: Combined file paths separated by GRAPH_FIELD_SEP
|
||||
"""
|
||||
# set: deduplication
|
||||
file_paths_set = {fp for fp in already_file_paths if fp}
|
||||
|
||||
# string: filter empty value and keep file order in already_file_paths
|
||||
file_paths = GRAPH_FIELD_SEP.join(fp for fp in already_file_paths if fp)
|
||||
# ignored file_paths
|
||||
file_paths_ignore = ""
|
||||
# add file_paths
|
||||
for dp in data_list:
|
||||
cur_file_path = dp.get("file_path")
|
||||
# empty
|
||||
if not cur_file_path:
|
||||
continue
|
||||
|
||||
# skip duplicate item
|
||||
if cur_file_path in file_paths_set:
|
||||
continue
|
||||
# add
|
||||
file_paths_set.add(cur_file_path)
|
||||
|
||||
# check the length
|
||||
if (
|
||||
len(file_paths) + len(GRAPH_FIELD_SEP + cur_file_path)
|
||||
< DEFAULT_MAX_FILE_PATH_LENGTH
|
||||
):
|
||||
# append
|
||||
file_paths += (
|
||||
GRAPH_FIELD_SEP + cur_file_path if file_paths else cur_file_path
|
||||
)
|
||||
else:
|
||||
# ignore
|
||||
file_paths_ignore += GRAPH_FIELD_SEP + cur_file_path
|
||||
|
||||
if file_paths_ignore:
|
||||
logger.warning(
|
||||
f"Length of file_path exceeds {target}, ignoring new file: {file_paths_ignore}"
|
||||
)
|
||||
return file_paths
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user