Skip to content

Commit

Permalink
Fix pr All-Hands-AI#5248: Fix issue All-Hands-AI#2947: Feat: make use…
Browse files Browse the repository at this point in the history
… of litellm's response "usage" data
  • Loading branch information
openhands-agent committed Nov 25, 2024
1 parent 3063dab commit bb8deb7
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 50 deletions.
4 changes: 3 additions & 1 deletion openhands/llm/debug_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@


class DebugMixin:
def log_prompt(self, messages: list[Message | dict[str, Any]] | Message | dict[str, Any]):
def log_prompt(
self, messages: list[Message | dict[str, Any]] | Message | dict[str, Any]
):
if not messages:
logger.debug('No completion messages!')
return
Expand Down
6 changes: 4 additions & 2 deletions openhands/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def get_token_count(self, messages) -> int:
else:
all_tokens_available = False
break

if all_tokens_available:
return total_tokens

Expand Down Expand Up @@ -589,7 +589,9 @@ def format_messages_for_llm(self, messages: Message | list[Message]) -> list[dic
formatted_messages.append(formatted_message)
return formatted_messages

def _update_message_token_counts(self, message: Message, usage: Usage | None) -> None:
def _update_message_token_counts(
self, message: Message, usage: Usage | None
) -> None:
"""Update token counts in a message from litellm Usage data.
Args:
Expand Down
81 changes: 34 additions & 47 deletions tests/unit/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,96 +369,83 @@ def test_llm_cloudflare_blockage(mock_litellm_completion, default_config):
@patch('openhands.llm.llm.litellm_completion')
def test_token_count_with_usage_data(mock_litellm_completion, default_config):
# Mock a response with usage data
from litellm.types.utils import ModelResponse, Usage, Choices
from litellm import Message as LiteLLMMessage
from litellm.types.utils import Choices, ModelResponse, Usage

mock_response = ModelResponse(
id='test-id',
choices=[Choices(
message=LiteLLMMessage(
role='assistant',
content='Test response'
),
finish_reason='stop',
index=0,
)],
choices=[
Choices(
message=LiteLLMMessage(role='assistant', content='Test response'),
finish_reason='stop',
index=0,
)
],
created=1234567890,
model='test-model',
object='chat.completion',
usage=Usage(
prompt_tokens=10,
completion_tokens=20,
total_tokens=30
)
usage=Usage(prompt_tokens=10, completion_tokens=20, total_tokens=30),
)
mock_litellm_completion.return_value = mock_response

llm = LLM(default_config)
response = llm.completion(messages=[Message(
role='user',
content=[TextContent(text='Hello!')]
)])
response = llm.completion(
messages=[Message(role='user', content=[TextContent(text='Hello!')])]
)

# Verify the response has usage data
assert response.usage.prompt_tokens == 10
assert response.usage.completion_tokens == 20
assert response.usage.total_tokens == 30

# Verify get_token_count uses the stored token counts
messages = [Message(
role='user',
content=[TextContent(text='Hello!')],
total_tokens=30
)]
messages = [
Message(role='user', content=[TextContent(text='Hello!')], total_tokens=30)
]
token_count = llm.get_token_count(messages)
assert token_count == 30


@patch('openhands.llm.llm.litellm_completion')
@patch('openhands.llm.llm.litellm.token_counter')
def test_token_count_fallback(mock_token_counter, mock_litellm_completion, default_config):
def test_token_count_fallback(
mock_token_counter, mock_litellm_completion, default_config
):
# Mock a response without usage data
from litellm.types.utils import ModelResponse, Usage, Choices
from litellm import Message as LiteLLMMessage
from litellm.types.utils import Choices, ModelResponse, Usage

mock_response = ModelResponse(
id='test-id',
choices=[Choices(
message=LiteLLMMessage(
role='assistant',
content='Test response'
),
finish_reason='stop',
index=0,
)],
choices=[
Choices(
message=LiteLLMMessage(role='assistant', content='Test response'),
finish_reason='stop',
index=0,
)
],
created=1234567890,
model='test-model',
object='chat.completion',
usage=Usage(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0
)
usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0),
)
mock_litellm_completion.return_value = mock_response
mock_token_counter.return_value = 42

llm = LLM(default_config)
response = llm.completion(messages=[Message(
role='user',
content=[TextContent(text='Hello!')]
)])
response = llm.completion(
messages=[Message(role='user', content=[TextContent(text='Hello!')])]
)

# Verify the response has empty usage data
assert response.usage.prompt_tokens == 0
assert response.usage.completion_tokens == 0
assert response.usage.total_tokens == 0

# Verify get_token_count falls back to litellm.token_counter
messages = [Message(
role='user',
content=[TextContent(text='Hello!')]
)]
messages = [Message(role='user', content=[TextContent(text='Hello!')])]
token_count = llm.get_token_count(messages)
assert token_count == 42
mock_token_counter.assert_called_once_with(model=default_config.model, messages=messages)
mock_token_counter.assert_called_once_with(
model=default_config.model, messages=messages
)

0 comments on commit bb8deb7

Please sign in to comment.