fix hf output bug

2025-12-27 06:42:13 +00:00 · 2024-10-26 02:20:23 +08:00 · 2024-10-26 02:20:23 +08:00 · 226f6f3d87
commit 226f6f3d87
parent 483667e630
1 changed files with 2 additions and 1 deletions
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@ -266,10 +266,11 @@ async def hf_model_if_cache(
    input_ids = hf_tokenizer(
        input_prompt, return_tensors="pt", padding=True, truncation=True
    ).to("cuda")
+    inputs = {k: v.to(hf_model.device) for k, v in input_ids.items()}
    output = hf_model.generate(
        **input_ids, max_new_tokens=200, num_return_sequences=1, early_stopping=True
    )
-    response_text = hf_tokenizer.decode(output[0], skip_special_tokens=True)
+    response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
    if hashing_kv is not None:
        await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}})
    return response_text