Fix typo in code (#8327)

### What problem does this PR solve?

Fix typo in code

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai 2025-06-18 09:41:09 +08:00 committed by GitHub
parent 09b7ac26ad
commit 4a2ff633e0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 45 additions and 45 deletions

View File

@ -84,14 +84,14 @@ def init_superuser():
{"role": "user", "content": "Hello!"}], gen_conf={}) {"role": "user", "content": "Hello!"}], gen_conf={})
if msg.find("ERROR: ") == 0: if msg.find("ERROR: ") == 0:
logging.error( logging.error(
"'{}' dosen't work. {}".format( "'{}' doesn't work. {}".format(
tenant["llm_id"], tenant["llm_id"],
msg)) msg))
embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"]) embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
v, c = embd_mdl.encode(["Hello!"]) v, c = embd_mdl.encode(["Hello!"])
if c == 0: if c == 0:
logging.error( logging.error(
"'{}' dosen't work!".format( "'{}' doesn't work!".format(
tenant["embd_id"])) tenant["embd_id"]))

View File

@ -73,11 +73,11 @@ class UserCanvasService(CommonService):
User.nickname, User.nickname,
User.avatar.alias('tenant_avatar'), User.avatar.alias('tenant_avatar'),
] ]
angents = cls.model.select(*fields) \ agents = cls.model.select(*fields) \
.join(User, on=(cls.model.user_id == User.id)) \ .join(User, on=(cls.model.user_id == User.id)) \
.where(cls.model.id == pid) .where(cls.model.id == pid)
# obj = cls.model.query(id=pid)[0] # obj = cls.model.query(id=pid)[0]
return True, angents.dicts()[0] return True, agents.dicts()[0]
except Exception as e: except Exception as e:
print(e) print(e)
return False, None return False, None
@ -100,25 +100,25 @@ class UserCanvasService(CommonService):
cls.model.update_time cls.model.update_time
] ]
if keywords: if keywords:
angents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where( agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission == ((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
TenantPermission.TEAM.value)) | ( TenantPermission.TEAM.value)) | (
cls.model.user_id == user_id)), cls.model.user_id == user_id)),
(fn.LOWER(cls.model.title).contains(keywords.lower())) (fn.LOWER(cls.model.title).contains(keywords.lower()))
) )
else: else:
angents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where( agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission == ((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
TenantPermission.TEAM.value)) | ( TenantPermission.TEAM.value)) | (
cls.model.user_id == user_id)) cls.model.user_id == user_id))
) )
if desc: if desc:
angents = angents.order_by(cls.model.getter_by(orderby).desc()) agents = agents.order_by(cls.model.getter_by(orderby).desc())
else: else:
angents = angents.order_by(cls.model.getter_by(orderby).asc()) agents = agents.order_by(cls.model.getter_by(orderby).asc())
count = angents.count() count = agents.count()
angents = angents.paginate(page_number, items_per_page) agents = agents.paginate(page_number, items_per_page)
return list(angents.dicts()), count return list(agents.dicts()), count
def completion(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs): def completion(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):

View File

@ -18,9 +18,9 @@
# from beartype.claw import beartype_all # <-- you didn't sign up for this # from beartype.claw import beartype_all # <-- you didn't sign up for this
# beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code # beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code
from api.utils.log_utils import initRootLogger from api.utils.log_utils import init_root_logger
from plugin import GlobalPluginManager from plugin import GlobalPluginManager
initRootLogger("ragflow_server") init_root_logger("ragflow_server")
import logging import logging
import os import os

View File

@ -158,7 +158,7 @@ def filename_type(filename):
if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename): if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
return FileType.DOC.value return FileType.DOC.value
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus|mp3)$", filename): if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
return FileType.AURAL.value return FileType.AURAL.value
if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename): if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename):

View File

@ -30,7 +30,7 @@ def get_project_base_directory():
) )
return PROJECT_BASE return PROJECT_BASE
def initRootLogger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"): def init_root_logger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"):
global initialized_root_logger global initialized_root_logger
if initialized_root_logger: if initialized_root_logger:
return return

View File

@ -35,6 +35,6 @@ def crypt(line):
if __name__ == "__main__": if __name__ == "__main__":
pswd = crypt(sys.argv[1]) passwd = crypt(sys.argv[1])
print(pswd) print(passwd)
print(decrypt(pswd)) print(decrypt(passwd))

View File

@ -312,7 +312,7 @@ class PermissionEnum(StrEnum):
team = auto() team = auto()
class ChunkMethodnEnum(StrEnum): class ChunkMethodEnum(StrEnum):
naive = auto() naive = auto()
book = auto() book = auto()
email = auto() email = auto()
@ -382,7 +382,7 @@ class CreateDatasetReq(Base):
description: str | None = Field(default=None, max_length=65535) description: str | None = Field(default=None, max_length=65535)
embedding_model: Annotated[str, StringConstraints(strip_whitespace=True, max_length=255), Field(default="", serialization_alias="embd_id")] embedding_model: Annotated[str, StringConstraints(strip_whitespace=True, max_length=255), Field(default="", serialization_alias="embd_id")]
permission: PermissionEnum = Field(default=PermissionEnum.me, min_length=1, max_length=16) permission: PermissionEnum = Field(default=PermissionEnum.me, min_length=1, max_length=16)
chunk_method: ChunkMethodnEnum = Field(default=ChunkMethodnEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id") chunk_method: ChunkMethodEnum = Field(default=ChunkMethodEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id")
parser_config: ParserConfig | None = Field(default=None) parser_config: ParserConfig | None = Field(default=None)
@field_validator("avatar") @field_validator("avatar")

View File

@ -69,7 +69,7 @@ class RAGFlowDocxParser:
max_type = max(max_type.items(), key=lambda x: x[1])[0] max_type = max(max_type.items(), key=lambda x: x[1])[0]
colnm = len(df.iloc[0, :]) colnm = len(df.iloc[0, :])
hdrows = [0] # header is not nessesarily appear in the first line hdrows = [0] # header is not necessarily appear in the first line
if max_type == "Nu": if max_type == "Nu":
for r in range(1, len(df)): for r in range(1, len(df)):
tys = Counter([blockType(str(df.iloc[r, j])) tys = Counter([blockType(str(df.iloc[r, j]))

View File

@ -21,7 +21,7 @@ from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk
from rag.prompts import vision_llm_figure_describe_prompt from rag.prompts import vision_llm_figure_describe_prompt
def vision_figure_parser_figure_data_wraper(figures_data_without_positions): def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
return [ return [
( (
(figure_data[1], [figure_data[0]]), (figure_data[1], [figure_data[0]]),

View File

@ -180,13 +180,13 @@ class RAGFlowPdfParser:
return fea return fea
@staticmethod @staticmethod
def sort_X_by_page(arr, threashold): def sort_X_by_page(arr, threshold):
# sort using y1 first and then x1 # sort using y1 first and then x1
arr = sorted(arr, key=lambda r: (r["page_number"], r["x0"], r["top"])) arr = sorted(arr, key=lambda r: (r["page_number"], r["x0"], r["top"]))
for i in range(len(arr) - 1): for i in range(len(arr) - 1):
for j in range(i, -1, -1): for j in range(i, -1, -1):
# restore the order using th # restore the order using th
if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threashold \ if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threshold \
and arr[j + 1]["top"] < arr[j]["top"] \ and arr[j + 1]["top"] < arr[j]["top"] \
and arr[j + 1]["page_number"] == arr[j]["page_number"]: and arr[j + 1]["page_number"] == arr[j]["page_number"]:
tmp = arr[j] tmp = arr[j]
@ -264,13 +264,13 @@ class RAGFlowPdfParser:
for b in self.boxes: for b in self.boxes:
if b.get("layout_type", "") != "table": if b.get("layout_type", "") != "table":
continue continue
ii = Recognizer.find_overlapped_with_threashold(b, rows, thr=0.3) ii = Recognizer.find_overlapped_with_threshold(b, rows, thr=0.3)
if ii is not None: if ii is not None:
b["R"] = ii b["R"] = ii
b["R_top"] = rows[ii]["top"] b["R_top"] = rows[ii]["top"]
b["R_bott"] = rows[ii]["bottom"] b["R_bott"] = rows[ii]["bottom"]
ii = Recognizer.find_overlapped_with_threashold( ii = Recognizer.find_overlapped_with_threshold(
b, headers, thr=0.3) b, headers, thr=0.3)
if ii is not None: if ii is not None:
b["H_top"] = headers[ii]["top"] b["H_top"] = headers[ii]["top"]
@ -285,7 +285,7 @@ class RAGFlowPdfParser:
b["C_left"] = clmns[ii]["x0"] b["C_left"] = clmns[ii]["x0"]
b["C_right"] = clmns[ii]["x1"] b["C_right"] = clmns[ii]["x1"]
ii = Recognizer.find_overlapped_with_threashold(b, spans, thr=0.3) ii = Recognizer.find_overlapped_with_threshold(b, spans, thr=0.3)
if ii is not None: if ii is not None:
b["H_top"] = spans[ii]["top"] b["H_top"] = spans[ii]["top"]
b["H_bott"] = spans[ii]["bottom"] b["H_bott"] = spans[ii]["bottom"]

View File

@ -106,7 +106,7 @@ class LayoutRecognizer(Recognizer):
bxs.pop(i) bxs.pop(i)
continue continue
ii = self.find_overlapped_with_threashold(bxs[i], lts_, ii = self.find_overlapped_with_threshold(bxs[i], lts_,
thr=0.4) thr=0.4)
if ii is None: # belong to nothing if ii is None: # belong to nothing
bxs[i]["layout_type"] = "" bxs[i]["layout_type"] = ""

View File

@ -52,20 +52,20 @@ class Recognizer:
self.label_list = label_list self.label_list = label_list
@staticmethod @staticmethod
def sort_Y_firstly(arr, threashold): def sort_Y_firstly(arr, threshold):
def cmp(c1, c2): def cmp(c1, c2):
diff = c1["top"] - c2["top"] diff = c1["top"] - c2["top"]
if abs(diff) < threashold: if abs(diff) < threshold:
diff = c1["x0"] - c2["x0"] diff = c1["x0"] - c2["x0"]
return diff return diff
arr = sorted(arr, key=cmp_to_key(cmp)) arr = sorted(arr, key=cmp_to_key(cmp))
return arr return arr
@staticmethod @staticmethod
def sort_X_firstly(arr, threashold): def sort_X_firstly(arr, threshold):
def cmp(c1, c2): def cmp(c1, c2):
diff = c1["x0"] - c2["x0"] diff = c1["x0"] - c2["x0"]
if abs(diff) < threashold: if abs(diff) < threshold:
diff = c1["top"] - c2["top"] diff = c1["top"] - c2["top"]
return diff return diff
arr = sorted(arr, key=cmp_to_key(cmp)) arr = sorted(arr, key=cmp_to_key(cmp))
@ -239,15 +239,15 @@ class Recognizer:
e -= 1 e -= 1
break break
max_overlaped_i, max_overlaped = None, 0 max_overlapped_i, max_overlapped = None, 0
for i in range(s, e): for i in range(s, e):
ov = Recognizer.overlapped_area(bxs[i], box) ov = Recognizer.overlapped_area(bxs[i], box)
if ov <= max_overlaped: if ov <= max_overlapped:
continue continue
max_overlaped_i = i max_overlapped_i = i
max_overlaped = ov max_overlapped = ov
return max_overlaped_i return max_overlapped_i
@staticmethod @staticmethod
def find_horizontally_tightest_fit(box, boxes): def find_horizontally_tightest_fit(box, boxes):
@ -264,7 +264,7 @@ class Recognizer:
return min_i return min_i
@staticmethod @staticmethod
def find_overlapped_with_threashold(box, boxes, thr=0.3): def find_overlapped_with_threshold(box, boxes, thr=0.3):
if not boxes: if not boxes:
return return
max_overlapped_i, max_overlapped, _max_overlapped = None, thr, 0 max_overlapped_i, max_overlapped, _max_overlapped = None, thr, 0

View File

@ -84,13 +84,13 @@ def get_table_html(img, tb_cpns, ocr):
clmns = LayoutRecognizer.layouts_cleanup(boxes, clmns, 5, 0.5) clmns = LayoutRecognizer.layouts_cleanup(boxes, clmns, 5, 0.5)
for b in boxes: for b in boxes:
ii = LayoutRecognizer.find_overlapped_with_threashold(b, rows, thr=0.3) ii = LayoutRecognizer.find_overlapped_with_threshold(b, rows, thr=0.3)
if ii is not None: if ii is not None:
b["R"] = ii b["R"] = ii
b["R_top"] = rows[ii]["top"] b["R_top"] = rows[ii]["top"]
b["R_bott"] = rows[ii]["bottom"] b["R_bott"] = rows[ii]["bottom"]
ii = LayoutRecognizer.find_overlapped_with_threashold(b, headers, thr=0.3) ii = LayoutRecognizer.find_overlapped_with_threshold(b, headers, thr=0.3)
if ii is not None: if ii is not None:
b["H_top"] = headers[ii]["top"] b["H_top"] = headers[ii]["top"]
b["H_bott"] = headers[ii]["bottom"] b["H_bott"] = headers[ii]["bottom"]
@ -104,7 +104,7 @@ def get_table_html(img, tb_cpns, ocr):
b["C_left"] = clmns[ii]["x0"] b["C_left"] = clmns[ii]["x0"]
b["C_right"] = clmns[ii]["x1"] b["C_right"] = clmns[ii]["x1"]
ii = LayoutRecognizer.find_overlapped_with_threashold(b, spans, thr=0.3) ii = LayoutRecognizer.find_overlapped_with_threshold(b, spans, thr=0.3)
if ii is not None: if ii is not None:
b["H_top"] = spans[ii]["top"] b["H_top"] = spans[ii]["top"]
b["H_bott"] = spans[ii]["bottom"] b["H_bott"] = spans[ii]["bottom"]

View File

@ -29,7 +29,7 @@ from tika import parser
from api.db import LLMType from api.db import LLMType
from api.db.services.llm_service import LLMBundle from api.db.services.llm_service import LLMBundle
from deepdoc.parser import DocxParser, ExcelParser, HtmlParser, JsonParser, MarkdownParser, PdfParser, TxtParser from deepdoc.parser import DocxParser, ExcelParser, HtmlParser, JsonParser, MarkdownParser, PdfParser, TxtParser
from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wraper from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wrapper
from deepdoc.parser.pdf_parser import PlainParser, VisionParser from deepdoc.parser.pdf_parser import PlainParser, VisionParser
from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table
@ -379,7 +379,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
sections, tables = Docx()(filename, binary) sections, tables = Docx()(filename, binary)
if vision_model: if vision_model:
figures_data = vision_figure_parser_figure_data_wraper(sections) figures_data = vision_figure_parser_figure_data_wrapper(sections)
try: try:
docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs) docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
boosted_figures = docx_vision_parser(callback=callback) boosted_figures = docx_vision_parser(callback=callback)

View File

@ -21,7 +21,7 @@ import sys
import threading import threading
import time import time
from api.utils.log_utils import initRootLogger, get_project_base_directory from api.utils.log_utils import init_root_logger, get_project_base_directory
from graphrag.general.index import run_graphrag from graphrag.general.index import run_graphrag
from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache
from rag.prompts import keyword_extraction, question_proposal, content_tagging from rag.prompts import keyword_extraction, question_proposal, content_tagging
@ -773,5 +773,5 @@ async def main():
if __name__ == "__main__": if __name__ == "__main__":
faulthandler.enable() faulthandler.enable()
initRootLogger(CONSUMER_NAME) init_root_logger(CONSUMER_NAME)
trio.run(main) trio.run(main)