Merge pull request #1757 from danielaskdd/fix-issue-1746

Fix: Ensured token tracking information is present for stream responses from OpenAI
2025-08-10 17:50:45 +00:00 · 2025-07-08 18:49:44 +08:00 · 2025-07-08 18:49:44 +08:00 · a10503a573
commit a10503a573
parent b87e48c532 b6ab69e25d
1 changed files with 27 additions and 3 deletions
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@ -210,9 +210,18 @@ async def openai_complete_if_cache(
        async def inner():
            # Track if we've started iterating
            iteration_started = False
            final_chunk_usage = None
            try:
                iteration_started = True
                async for chunk in response:
                    # Check if this chunk has usage information (final chunk)
                    if hasattr(chunk, "usage") and chunk.usage:
                        final_chunk_usage = chunk.usage
                        logger.debug(
                            f"Received usage info in streaming chunk: {chunk.usage}"
                        )
                    # Check if choices exists and is not empty
                    if not hasattr(chunk, "choices") or not chunk.choices:
                        logger.warning(f"Received chunk without choices: {chunk}")
@ -222,16 +231,31 @@ async def openai_complete_if_cache(
                    if not hasattr(chunk.choices[0], "delta") or not hasattr(
                        chunk.choices[0].delta, "content"
                    ):
-                        logger.warning(
+                        # This might be the final chunk, continue to check for usage
                            f"Received chunk without delta content: {chunk.choices[0]}"
                        )
                        continue
                    content = chunk.choices[0].delta.content
                    if content is None:
                        continue
                    if r"\u" in content:
                        content = safe_unicode_decode(content.encode("utf-8"))
                    yield content
                # After streaming is complete, track token usage
                if token_tracker and final_chunk_usage:
                    # Use actual usage from the API
                    token_counts = {
                        "prompt_tokens": getattr(final_chunk_usage, "prompt_tokens", 0),
                        "completion_tokens": getattr(
                            final_chunk_usage, "completion_tokens", 0
                        ),
                        "total_tokens": getattr(final_chunk_usage, "total_tokens", 0),
                    }
                    token_tracker.add_usage(token_counts)
                    logger.debug(f"Streaming token usage (from API): {token_counts}")
                elif token_tracker:
                    logger.debug("No usage information available in streaming response")
            except Exception as e:
                logger.error(f"Error in stream response: {str(e)}")
                # Try to clean up resources if possible