feat(conductor): Expose history bleed flags
This change introduces a new function, get_history_bleed_stats, to calculate and expose how close the current conversation history is to the provider's token limit. The initial implementation supports Anthropic, with a placeholder for Gemini.
This commit is contained in:
36
ai_client.py
36
ai_client.py
@@ -1145,4 +1145,38 @@ def send(
|
|||||||
return _send_gemini(md_content, user_message, base_dir, file_items)
|
return _send_gemini(md_content, user_message, base_dir, file_items)
|
||||||
elif _provider == "anthropic":
|
elif _provider == "anthropic":
|
||||||
return _send_anthropic(md_content, user_message, base_dir, file_items)
|
return _send_anthropic(md_content, user_message, base_dir, file_items)
|
||||||
raise ValueError(f"unknown provider: {_provider}")
|
raise ValueError(f"unknown provider: {_provider}")
|
||||||
|
|
||||||
|
def get_history_bleed_stats() -> dict:
|
||||||
|
"""
|
||||||
|
Calculates how close the current conversation history is to the token limit.
|
||||||
|
"""
|
||||||
|
if _provider == "anthropic":
|
||||||
|
# For Anthropic, we have a robust estimator
|
||||||
|
current_tokens = _estimate_prompt_tokens([], _anthropic_history)
|
||||||
|
limit_tokens = _ANTHROPIC_MAX_PROMPT_TOKENS
|
||||||
|
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
||||||
|
return {
|
||||||
|
"provider": "anthropic",
|
||||||
|
"limit": limit_tokens,
|
||||||
|
"current": current_tokens,
|
||||||
|
"percentage": percentage,
|
||||||
|
}
|
||||||
|
elif _provider == "gemini":
|
||||||
|
# For Gemini, token estimation is complex and handled by the server.
|
||||||
|
# We don't have a reliable client-side estimate, so we return a
|
||||||
|
# "not implemented" state for now.
|
||||||
|
return {
|
||||||
|
"provider": "gemini",
|
||||||
|
"limit": _GEMINI_MAX_INPUT_TOKENS,
|
||||||
|
"current": 0,
|
||||||
|
"percentage": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Default empty state
|
||||||
|
return {
|
||||||
|
"provider": _provider,
|
||||||
|
"limit": 0,
|
||||||
|
"current": 0,
|
||||||
|
"percentage": 0,
|
||||||
|
}
|
||||||
56
tests/test_history_bleed.py
Normal file
56
tests/test_history_bleed.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
# Import the module to be tested
|
||||||
|
import ai_client
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def reset_ai_client_session():
|
||||||
|
"""Fixture to automatically reset the ai_client session before each test."""
|
||||||
|
ai_client.reset_session()
|
||||||
|
|
||||||
|
def test_anthropic_history_bleed_calculation():
|
||||||
|
"""
|
||||||
|
Tests that get_history_bleed_stats calculates the token usage
|
||||||
|
percentage correctly for the Anthropic provider.
|
||||||
|
"""
|
||||||
|
# 1. Set up the test environment
|
||||||
|
ai_client.set_provider("anthropic", "claude-3-opus-20240229")
|
||||||
|
|
||||||
|
# Define the mock return value for the token estimator
|
||||||
|
mock_token_count = 150_000
|
||||||
|
# The hardcoded limit in the module is 180_000
|
||||||
|
expected_percentage = (mock_token_count / 180_000) * 100
|
||||||
|
|
||||||
|
# 2. Mock the internal dependencies
|
||||||
|
# We patch _estimate_prompt_tokens as it's the core of the calculation for anthropic
|
||||||
|
with patch('ai_client._estimate_prompt_tokens', return_value=mock_token_count) as mock_estimator:
|
||||||
|
|
||||||
|
# 3. Call the function under test (which doesn't exist yet)
|
||||||
|
stats = ai_client.get_history_bleed_stats()
|
||||||
|
|
||||||
|
# 4. Assert the results
|
||||||
|
assert stats["provider"] == "anthropic"
|
||||||
|
assert stats["limit"] == 180_000
|
||||||
|
assert stats["current"] == mock_token_count
|
||||||
|
assert stats["percentage"] == pytest.approx(expected_percentage)
|
||||||
|
|
||||||
|
# Ensure the mock was called
|
||||||
|
mock_estimator.assert_called_once()
|
||||||
|
|
||||||
|
def test_gemini_history_bleed_not_implemented():
|
||||||
|
"""
|
||||||
|
Tests that get_history_bleed_stats returns a 'not implemented' state
|
||||||
|
for Gemini, as its token calculation is different.
|
||||||
|
"""
|
||||||
|
# 1. Set up the test environment
|
||||||
|
ai_client.set_provider("gemini", "gemini-1.5-pro-latest")
|
||||||
|
|
||||||
|
# 2. Call the function
|
||||||
|
stats = ai_client.get_history_bleed_stats()
|
||||||
|
|
||||||
|
# 3. Assert the 'not implemented' state
|
||||||
|
assert stats["provider"] == "gemini"
|
||||||
|
assert stats["limit"] == 900_000 # The constant _GEMINI_MAX_INPUT_TOKENS
|
||||||
|
assert stats["current"] == 0
|
||||||
|
assert stats["percentage"] == 0
|
||||||
Reference in New Issue
Block a user