mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-07-05 16:10:42 +00:00
Fix typo in code (#8327)
### What problem does this PR solve? Fix typo in code ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
parent
09b7ac26ad
commit
4a2ff633e0
@ -84,14 +84,14 @@ def init_superuser():
|
||||
{"role": "user", "content": "Hello!"}], gen_conf={})
|
||||
if msg.find("ERROR: ") == 0:
|
||||
logging.error(
|
||||
"'{}' dosen't work. {}".format(
|
||||
"'{}' doesn't work. {}".format(
|
||||
tenant["llm_id"],
|
||||
msg))
|
||||
embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
|
||||
v, c = embd_mdl.encode(["Hello!"])
|
||||
if c == 0:
|
||||
logging.error(
|
||||
"'{}' dosen't work!".format(
|
||||
"'{}' doesn't work!".format(
|
||||
tenant["embd_id"]))
|
||||
|
||||
|
||||
|
@ -73,11 +73,11 @@ class UserCanvasService(CommonService):
|
||||
User.nickname,
|
||||
User.avatar.alias('tenant_avatar'),
|
||||
]
|
||||
angents = cls.model.select(*fields) \
|
||||
agents = cls.model.select(*fields) \
|
||||
.join(User, on=(cls.model.user_id == User.id)) \
|
||||
.where(cls.model.id == pid)
|
||||
# obj = cls.model.query(id=pid)[0]
|
||||
return True, angents.dicts()[0]
|
||||
return True, agents.dicts()[0]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return False, None
|
||||
@ -100,25 +100,25 @@ class UserCanvasService(CommonService):
|
||||
cls.model.update_time
|
||||
]
|
||||
if keywords:
|
||||
angents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
||||
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
||||
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
|
||||
TenantPermission.TEAM.value)) | (
|
||||
cls.model.user_id == user_id)),
|
||||
(fn.LOWER(cls.model.title).contains(keywords.lower()))
|
||||
)
|
||||
else:
|
||||
angents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
||||
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
||||
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
|
||||
TenantPermission.TEAM.value)) | (
|
||||
cls.model.user_id == user_id))
|
||||
)
|
||||
if desc:
|
||||
angents = angents.order_by(cls.model.getter_by(orderby).desc())
|
||||
agents = agents.order_by(cls.model.getter_by(orderby).desc())
|
||||
else:
|
||||
angents = angents.order_by(cls.model.getter_by(orderby).asc())
|
||||
count = angents.count()
|
||||
angents = angents.paginate(page_number, items_per_page)
|
||||
return list(angents.dicts()), count
|
||||
agents = agents.order_by(cls.model.getter_by(orderby).asc())
|
||||
count = agents.count()
|
||||
agents = agents.paginate(page_number, items_per_page)
|
||||
return list(agents.dicts()), count
|
||||
|
||||
|
||||
def completion(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):
|
||||
|
@ -18,9 +18,9 @@
|
||||
# from beartype.claw import beartype_all # <-- you didn't sign up for this
|
||||
# beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code
|
||||
|
||||
from api.utils.log_utils import initRootLogger
|
||||
from api.utils.log_utils import init_root_logger
|
||||
from plugin import GlobalPluginManager
|
||||
initRootLogger("ragflow_server")
|
||||
init_root_logger("ragflow_server")
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
@ -158,7 +158,7 @@ def filename_type(filename):
|
||||
if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
|
||||
return FileType.DOC.value
|
||||
|
||||
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus|mp3)$", filename):
|
||||
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
|
||||
return FileType.AURAL.value
|
||||
|
||||
if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename):
|
||||
|
@ -30,7 +30,7 @@ def get_project_base_directory():
|
||||
)
|
||||
return PROJECT_BASE
|
||||
|
||||
def initRootLogger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"):
|
||||
def init_root_logger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"):
|
||||
global initialized_root_logger
|
||||
if initialized_root_logger:
|
||||
return
|
||||
|
@ -35,6 +35,6 @@ def crypt(line):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pswd = crypt(sys.argv[1])
|
||||
print(pswd)
|
||||
print(decrypt(pswd))
|
||||
passwd = crypt(sys.argv[1])
|
||||
print(passwd)
|
||||
print(decrypt(passwd))
|
||||
|
@ -312,7 +312,7 @@ class PermissionEnum(StrEnum):
|
||||
team = auto()
|
||||
|
||||
|
||||
class ChunkMethodnEnum(StrEnum):
|
||||
class ChunkMethodEnum(StrEnum):
|
||||
naive = auto()
|
||||
book = auto()
|
||||
email = auto()
|
||||
@ -382,7 +382,7 @@ class CreateDatasetReq(Base):
|
||||
description: str | None = Field(default=None, max_length=65535)
|
||||
embedding_model: Annotated[str, StringConstraints(strip_whitespace=True, max_length=255), Field(default="", serialization_alias="embd_id")]
|
||||
permission: PermissionEnum = Field(default=PermissionEnum.me, min_length=1, max_length=16)
|
||||
chunk_method: ChunkMethodnEnum = Field(default=ChunkMethodnEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id")
|
||||
chunk_method: ChunkMethodEnum = Field(default=ChunkMethodEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id")
|
||||
parser_config: ParserConfig | None = Field(default=None)
|
||||
|
||||
@field_validator("avatar")
|
||||
|
@ -69,7 +69,7 @@ class RAGFlowDocxParser:
|
||||
max_type = max(max_type.items(), key=lambda x: x[1])[0]
|
||||
|
||||
colnm = len(df.iloc[0, :])
|
||||
hdrows = [0] # header is not nessesarily appear in the first line
|
||||
hdrows = [0] # header is not necessarily appear in the first line
|
||||
if max_type == "Nu":
|
||||
for r in range(1, len(df)):
|
||||
tys = Counter([blockType(str(df.iloc[r, j]))
|
||||
|
@ -21,7 +21,7 @@ from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk
|
||||
from rag.prompts import vision_llm_figure_describe_prompt
|
||||
|
||||
|
||||
def vision_figure_parser_figure_data_wraper(figures_data_without_positions):
|
||||
def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
|
||||
return [
|
||||
(
|
||||
(figure_data[1], [figure_data[0]]),
|
||||
|
@ -180,13 +180,13 @@ class RAGFlowPdfParser:
|
||||
return fea
|
||||
|
||||
@staticmethod
|
||||
def sort_X_by_page(arr, threashold):
|
||||
def sort_X_by_page(arr, threshold):
|
||||
# sort using y1 first and then x1
|
||||
arr = sorted(arr, key=lambda r: (r["page_number"], r["x0"], r["top"]))
|
||||
for i in range(len(arr) - 1):
|
||||
for j in range(i, -1, -1):
|
||||
# restore the order using th
|
||||
if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threashold \
|
||||
if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threshold \
|
||||
and arr[j + 1]["top"] < arr[j]["top"] \
|
||||
and arr[j + 1]["page_number"] == arr[j]["page_number"]:
|
||||
tmp = arr[j]
|
||||
@ -264,13 +264,13 @@ class RAGFlowPdfParser:
|
||||
for b in self.boxes:
|
||||
if b.get("layout_type", "") != "table":
|
||||
continue
|
||||
ii = Recognizer.find_overlapped_with_threashold(b, rows, thr=0.3)
|
||||
ii = Recognizer.find_overlapped_with_threshold(b, rows, thr=0.3)
|
||||
if ii is not None:
|
||||
b["R"] = ii
|
||||
b["R_top"] = rows[ii]["top"]
|
||||
b["R_bott"] = rows[ii]["bottom"]
|
||||
|
||||
ii = Recognizer.find_overlapped_with_threashold(
|
||||
ii = Recognizer.find_overlapped_with_threshold(
|
||||
b, headers, thr=0.3)
|
||||
if ii is not None:
|
||||
b["H_top"] = headers[ii]["top"]
|
||||
@ -285,7 +285,7 @@ class RAGFlowPdfParser:
|
||||
b["C_left"] = clmns[ii]["x0"]
|
||||
b["C_right"] = clmns[ii]["x1"]
|
||||
|
||||
ii = Recognizer.find_overlapped_with_threashold(b, spans, thr=0.3)
|
||||
ii = Recognizer.find_overlapped_with_threshold(b, spans, thr=0.3)
|
||||
if ii is not None:
|
||||
b["H_top"] = spans[ii]["top"]
|
||||
b["H_bott"] = spans[ii]["bottom"]
|
||||
|
@ -106,7 +106,7 @@ class LayoutRecognizer(Recognizer):
|
||||
bxs.pop(i)
|
||||
continue
|
||||
|
||||
ii = self.find_overlapped_with_threashold(bxs[i], lts_,
|
||||
ii = self.find_overlapped_with_threshold(bxs[i], lts_,
|
||||
thr=0.4)
|
||||
if ii is None: # belong to nothing
|
||||
bxs[i]["layout_type"] = ""
|
||||
|
@ -52,20 +52,20 @@ class Recognizer:
|
||||
self.label_list = label_list
|
||||
|
||||
@staticmethod
|
||||
def sort_Y_firstly(arr, threashold):
|
||||
def sort_Y_firstly(arr, threshold):
|
||||
def cmp(c1, c2):
|
||||
diff = c1["top"] - c2["top"]
|
||||
if abs(diff) < threashold:
|
||||
if abs(diff) < threshold:
|
||||
diff = c1["x0"] - c2["x0"]
|
||||
return diff
|
||||
arr = sorted(arr, key=cmp_to_key(cmp))
|
||||
return arr
|
||||
|
||||
@staticmethod
|
||||
def sort_X_firstly(arr, threashold):
|
||||
def sort_X_firstly(arr, threshold):
|
||||
def cmp(c1, c2):
|
||||
diff = c1["x0"] - c2["x0"]
|
||||
if abs(diff) < threashold:
|
||||
if abs(diff) < threshold:
|
||||
diff = c1["top"] - c2["top"]
|
||||
return diff
|
||||
arr = sorted(arr, key=cmp_to_key(cmp))
|
||||
@ -239,15 +239,15 @@ class Recognizer:
|
||||
e -= 1
|
||||
break
|
||||
|
||||
max_overlaped_i, max_overlaped = None, 0
|
||||
max_overlapped_i, max_overlapped = None, 0
|
||||
for i in range(s, e):
|
||||
ov = Recognizer.overlapped_area(bxs[i], box)
|
||||
if ov <= max_overlaped:
|
||||
if ov <= max_overlapped:
|
||||
continue
|
||||
max_overlaped_i = i
|
||||
max_overlaped = ov
|
||||
max_overlapped_i = i
|
||||
max_overlapped = ov
|
||||
|
||||
return max_overlaped_i
|
||||
return max_overlapped_i
|
||||
|
||||
@staticmethod
|
||||
def find_horizontally_tightest_fit(box, boxes):
|
||||
@ -264,7 +264,7 @@ class Recognizer:
|
||||
return min_i
|
||||
|
||||
@staticmethod
|
||||
def find_overlapped_with_threashold(box, boxes, thr=0.3):
|
||||
def find_overlapped_with_threshold(box, boxes, thr=0.3):
|
||||
if not boxes:
|
||||
return
|
||||
max_overlapped_i, max_overlapped, _max_overlapped = None, thr, 0
|
||||
|
@ -84,13 +84,13 @@ def get_table_html(img, tb_cpns, ocr):
|
||||
clmns = LayoutRecognizer.layouts_cleanup(boxes, clmns, 5, 0.5)
|
||||
|
||||
for b in boxes:
|
||||
ii = LayoutRecognizer.find_overlapped_with_threashold(b, rows, thr=0.3)
|
||||
ii = LayoutRecognizer.find_overlapped_with_threshold(b, rows, thr=0.3)
|
||||
if ii is not None:
|
||||
b["R"] = ii
|
||||
b["R_top"] = rows[ii]["top"]
|
||||
b["R_bott"] = rows[ii]["bottom"]
|
||||
|
||||
ii = LayoutRecognizer.find_overlapped_with_threashold(b, headers, thr=0.3)
|
||||
ii = LayoutRecognizer.find_overlapped_with_threshold(b, headers, thr=0.3)
|
||||
if ii is not None:
|
||||
b["H_top"] = headers[ii]["top"]
|
||||
b["H_bott"] = headers[ii]["bottom"]
|
||||
@ -104,7 +104,7 @@ def get_table_html(img, tb_cpns, ocr):
|
||||
b["C_left"] = clmns[ii]["x0"]
|
||||
b["C_right"] = clmns[ii]["x1"]
|
||||
|
||||
ii = LayoutRecognizer.find_overlapped_with_threashold(b, spans, thr=0.3)
|
||||
ii = LayoutRecognizer.find_overlapped_with_threshold(b, spans, thr=0.3)
|
||||
if ii is not None:
|
||||
b["H_top"] = spans[ii]["top"]
|
||||
b["H_bott"] = spans[ii]["bottom"]
|
||||
|
@ -29,7 +29,7 @@ from tika import parser
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from deepdoc.parser import DocxParser, ExcelParser, HtmlParser, JsonParser, MarkdownParser, PdfParser, TxtParser
|
||||
from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wraper
|
||||
from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wrapper
|
||||
from deepdoc.parser.pdf_parser import PlainParser, VisionParser
|
||||
from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table
|
||||
|
||||
@ -379,7 +379,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
sections, tables = Docx()(filename, binary)
|
||||
|
||||
if vision_model:
|
||||
figures_data = vision_figure_parser_figure_data_wraper(sections)
|
||||
figures_data = vision_figure_parser_figure_data_wrapper(sections)
|
||||
try:
|
||||
docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
|
||||
boosted_figures = docx_vision_parser(callback=callback)
|
||||
|
@ -21,7 +21,7 @@ import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
from api.utils.log_utils import initRootLogger, get_project_base_directory
|
||||
from api.utils.log_utils import init_root_logger, get_project_base_directory
|
||||
from graphrag.general.index import run_graphrag
|
||||
from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache
|
||||
from rag.prompts import keyword_extraction, question_proposal, content_tagging
|
||||
@ -773,5 +773,5 @@ async def main():
|
||||
|
||||
if __name__ == "__main__":
|
||||
faulthandler.enable()
|
||||
initRootLogger(CONSUMER_NAME)
|
||||
init_root_logger(CONSUMER_NAME)
|
||||
trio.run(main)
|
||||
|
Loading…
x
Reference in New Issue
Block a user