diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index 30491476..eb74c2f1 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -211,15 +211,17 @@ async def openai_complete_if_cache( # Track if we've started iterating iteration_started = False final_chunk_usage = None - + try: iteration_started = True async for chunk in response: # Check if this chunk has usage information (final chunk) if hasattr(chunk, "usage") and chunk.usage: final_chunk_usage = chunk.usage - logger.debug(f"Received usage info in streaming chunk: {chunk.usage}") - + logger.debug( + f"Received usage info in streaming chunk: {chunk.usage}" + ) + # Check if choices exists and is not empty if not hasattr(chunk, "choices") or not chunk.choices: logger.warning(f"Received chunk without choices: {chunk}") @@ -231,21 +233,23 @@ async def openai_complete_if_cache( ): # This might be the final chunk, continue to check for usage continue - + content = chunk.choices[0].delta.content if content is None: continue if r"\u" in content: content = safe_unicode_decode(content.encode("utf-8")) - + yield content - + # After streaming is complete, track token usage if token_tracker and final_chunk_usage: # Use actual usage from the API token_counts = { "prompt_tokens": getattr(final_chunk_usage, "prompt_tokens", 0), - "completion_tokens": getattr(final_chunk_usage, "completion_tokens", 0), + "completion_tokens": getattr( + final_chunk_usage, "completion_tokens", 0 + ), "total_tokens": getattr(final_chunk_usage, "total_tokens", 0), } token_tracker.add_usage(token_counts) @@ -471,4 +475,4 @@ async def openai_embed( response = await openai_async_client.embeddings.create( model=model, input=texts, encoding_format="float" ) - return np.array([dp.embedding for dp in response.data]) \ No newline at end of file + return np.array([dp.embedding for dp in response.data])