mirror of
https://github.com/microsoft/autogen.git
synced 2025-12-26 06:28:50 +00:00
Default usage statistics for streaming responses (#6578)
## Why are these changes needed?
Enables usage statistics for streaming responses by default.
There is a similar bug in the AzureAI client. Theoretically adding the
parameter
```
model_extras={"stream_options": {"include_usage": True}}
```
should fix the problem, but I'm currently unable to test that workflow
## Related issue number
closes https://github.com/microsoft/autogen/issues/6548
## Checks
- [ ] I've included any doc changes needed for
<https://microsoft.github.io/autogen/>. See
<https://github.com/microsoft/autogen/blob/main/CONTRIBUTING.md> to
build and test documentation locally.
- [ ] I've added tests (if relevant) corresponding to the changes
introduced in this PR.
- [ ] I've made sure all auto checks have passed.
This commit is contained in:
parent
9bbcfa03ac
commit
03394a42c0
@ -740,6 +740,7 @@ class BaseOpenAIChatCompletionClient(ChatCompletionClient):
|
||||
extra_create_args: Mapping[str, Any] = {},
|
||||
cancellation_token: Optional[CancellationToken] = None,
|
||||
max_consecutive_empty_chunk_tolerance: int = 0,
|
||||
include_usage: Optional[bool] = None,
|
||||
) -> AsyncGenerator[Union[str, CreateResult], None]:
|
||||
"""Create a stream of string chunks from the model ending with a :class:`~autogen_core.models.CreateResult`.
|
||||
|
||||
@ -748,7 +749,7 @@ class BaseOpenAIChatCompletionClient(ChatCompletionClient):
|
||||
In streaming, the default behaviour is not return token usage counts.
|
||||
See: `OpenAI API reference for possible args <https://platform.openai.com/docs/api-reference/chat/create>`_.
|
||||
|
||||
You can set `extra_create_args={"stream_options": {"include_usage": True}}`
|
||||
You can set set the `include_usage` flag to True or `extra_create_args={"stream_options": {"include_usage": True}}`. If both the flag and `stream_options` are set, but to different values, an exception will be raised.
|
||||
(if supported by the accessed API) to
|
||||
return a final chunk with usage set to a :class:`~autogen_core.models.RequestUsage` object
|
||||
with prompt and completion token counts,
|
||||
@ -770,6 +771,17 @@ class BaseOpenAIChatCompletionClient(ChatCompletionClient):
|
||||
extra_create_args,
|
||||
)
|
||||
|
||||
if include_usage is not None:
|
||||
if "stream_options" in create_params.create_args:
|
||||
stream_options = create_params.create_args["stream_options"]
|
||||
if "include_usage" in stream_options and stream_options["include_usage"] != include_usage:
|
||||
raise ValueError(
|
||||
"include_usage and extra_create_args['stream_options']['include_usage'] are both set, but differ in value."
|
||||
)
|
||||
else:
|
||||
# If stream options are not present, add them.
|
||||
create_params.create_args["stream_options"] = {"include_usage": True}
|
||||
|
||||
if max_consecutive_empty_chunk_tolerance != 0:
|
||||
warnings.warn(
|
||||
"The 'max_consecutive_empty_chunk_tolerance' parameter is deprecated and will be removed in the future releases. All of empty chunks will be skipped with a warning.",
|
||||
|
||||
@ -276,6 +276,7 @@ async def test_openai_chat_completion_client_create_stream_with_usage(
|
||||
monkeypatch.setattr(AsyncCompletions, "create", _mock_create)
|
||||
client = OpenAIChatCompletionClient(model="gpt-4o", api_key="api_key")
|
||||
chunks: List[str | CreateResult] = []
|
||||
# Check that include_usage works when set via create_args
|
||||
with caplog.at_level(logging.INFO):
|
||||
async for chunk in client.create_stream(
|
||||
messages=[UserMessage(content="Hello", source="user")],
|
||||
@ -296,6 +297,38 @@ async def test_openai_chat_completion_client_create_stream_with_usage(
|
||||
assert chunks[-1].content in caplog.text
|
||||
assert chunks[-1].usage == RequestUsage(prompt_tokens=3, completion_tokens=3)
|
||||
|
||||
chunks = []
|
||||
# Check that include_usage works when set via include_usage flag
|
||||
with caplog.at_level(logging.INFO):
|
||||
async for chunk in client.create_stream(
|
||||
messages=[UserMessage(content="Hello", source="user")],
|
||||
include_usage=True,
|
||||
):
|
||||
chunks.append(chunk)
|
||||
|
||||
assert "LLMStreamStart" in caplog.text
|
||||
assert "LLMStreamEnd" in caplog.text
|
||||
|
||||
assert chunks[0] == "Hello"
|
||||
assert chunks[1] == " Another Hello"
|
||||
assert chunks[2] == " Yet Another Hello"
|
||||
assert isinstance(chunks[-1], CreateResult)
|
||||
assert isinstance(chunks[-1].content, str)
|
||||
assert chunks[-1].content == "Hello Another Hello Yet Another Hello"
|
||||
assert chunks[-1].content in caplog.text
|
||||
assert chunks[-1].usage == RequestUsage(prompt_tokens=3, completion_tokens=3)
|
||||
|
||||
chunks = []
|
||||
# Check that setting both flags to different values raises an exception
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
async for chunk in client.create_stream(
|
||||
messages=[UserMessage(content="Hello", source="user")],
|
||||
extra_create_args={"stream_options": {"include_usage": False}},
|
||||
include_usage=True,
|
||||
):
|
||||
chunks.append(chunk)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_chat_completion_client_create_stream_no_usage_default(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user