mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-12-27 06:42:13 +00:00
fix hf output bug
This commit is contained in:
parent
483667e630
commit
226f6f3d87
@ -266,10 +266,11 @@ async def hf_model_if_cache(
|
||||
input_ids = hf_tokenizer(
|
||||
input_prompt, return_tensors="pt", padding=True, truncation=True
|
||||
).to("cuda")
|
||||
inputs = {k: v.to(hf_model.device) for k, v in input_ids.items()}
|
||||
output = hf_model.generate(
|
||||
**input_ids, max_new_tokens=200, num_return_sequences=1, early_stopping=True
|
||||
)
|
||||
response_text = hf_tokenizer.decode(output[0], skip_special_tokens=True)
|
||||
response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
|
||||
if hashing_kv is not None:
|
||||
await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}})
|
||||
return response_text
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user