mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-11-16 10:04:32 +00:00
refine citation (#161)
This commit is contained in:
parent
37cc673098
commit
f3477202fe
@ -194,7 +194,8 @@ def chat(dialog, messages, **kwargs):
|
|||||||
# try to use sql if field mapping is good to go
|
# try to use sql if field mapping is good to go
|
||||||
if field_map:
|
if field_map:
|
||||||
chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
|
chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
|
||||||
return use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl)
|
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl)
|
||||||
|
if ans: return ans
|
||||||
|
|
||||||
prompt_config = dialog.prompt_config
|
prompt_config = dialog.prompt_config
|
||||||
for p in prompt_config["parameters"]:
|
for p in prompt_config["parameters"]:
|
||||||
@ -305,7 +306,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
|
|||||||
|
|
||||||
tbl, sql = get_table()
|
tbl, sql = get_table()
|
||||||
if tbl is None:
|
if tbl is None:
|
||||||
return None, None
|
return None
|
||||||
if tbl.get("error") and tried_times <= 2:
|
if tbl.get("error") and tried_times <= 2:
|
||||||
user_promt = """
|
user_promt = """
|
||||||
表名:{};
|
表名:{};
|
||||||
@ -333,7 +334,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
|
|||||||
chat_logger.info("GET table: {}".format(tbl))
|
chat_logger.info("GET table: {}".format(tbl))
|
||||||
print(tbl)
|
print(tbl)
|
||||||
if tbl.get("error") or len(tbl["rows"]) == 0:
|
if tbl.get("error") or len(tbl["rows"]) == 0:
|
||||||
return None, None
|
return None
|
||||||
|
|
||||||
docid_idx = set([ii for ii, c in enumerate(
|
docid_idx = set([ii for ii, c in enumerate(
|
||||||
tbl["columns"]) if c["name"] == "doc_id"])
|
tbl["columns"]) if c["name"] == "doc_id"])
|
||||||
|
|||||||
@ -120,7 +120,7 @@ class Pdf(PdfParser):
|
|||||||
print(tbls)
|
print(tbls)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"title": title if title else filename,
|
"title": title,
|
||||||
"authors": " ".join(authors),
|
"authors": " ".join(authors),
|
||||||
"abstract": abstr,
|
"abstract": abstr,
|
||||||
"sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if
|
"sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if
|
||||||
|
|||||||
@ -246,19 +246,22 @@ class Dealer:
|
|||||||
chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
|
chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
|
||||||
for ck in chunks]
|
for ck in chunks]
|
||||||
cites = {}
|
cites = {}
|
||||||
for i, a in enumerate(pieces_):
|
thr = 0.63
|
||||||
sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
|
while len(cites.keys()) == 0 and pieces_ and chunks_tks:
|
||||||
chunk_v,
|
for i, a in enumerate(pieces_):
|
||||||
huqie.qie(
|
sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
|
||||||
self.qryr.rmWWW(pieces_[i])).split(" "),
|
chunk_v,
|
||||||
chunks_tks,
|
huqie.qie(
|
||||||
tkweight, vtweight)
|
self.qryr.rmWWW(pieces_[i])).split(" "),
|
||||||
mx = np.max(sim) * 0.99
|
chunks_tks,
|
||||||
es_logger.info("{} SIM: {}".format(pieces_[i], mx))
|
tkweight, vtweight)
|
||||||
if mx < 0.63:
|
mx = np.max(sim) * 0.99
|
||||||
continue
|
es_logger.info("{} SIM: {}".format(pieces_[i], mx))
|
||||||
cites[idx[i]] = list(
|
if mx < thr:
|
||||||
set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
|
continue
|
||||||
|
cites[idx[i]] = list(
|
||||||
|
set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
|
||||||
|
thr *= 0.8
|
||||||
|
|
||||||
res = ""
|
res = ""
|
||||||
seted = set([])
|
seted = set([])
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user