test: Add extensive coverage for discussion metrics and compression
- Add tests/test_discussion_compression.py to verify AI sub-agent compression logic across Gemini, Anthropic, DeepSeek, and Gemini CLI providers. - Add tests/test_discussion_metrics.py to verify AppController correctly extracts and accumulates token usage (input/output/cache) and logs token history.
This commit is contained in:
@@ -0,0 +1,69 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from src import ai_client
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_ai_client():
|
||||
ai_client.reset_session()
|
||||
yield
|
||||
ai_client.reset_session()
|
||||
|
||||
def test_discussion_compression_gemini():
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
|
||||
mock_gemini = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.text = "This is a compressed summary."
|
||||
mock_gemini.models.generate_content.return_value = mock_response
|
||||
|
||||
with patch("src.ai_client._gemini_client", mock_gemini), \
|
||||
patch("src.ai_client._ensure_gemini_client"):
|
||||
result = ai_client.run_discussion_compression("User: Hello\nAI: Hi there!")
|
||||
|
||||
assert result == "This is a compressed summary."
|
||||
mock_gemini.models.generate_content.assert_called_once()
|
||||
args, kwargs = mock_gemini.models.generate_content.call_args
|
||||
assert "[HISTORY]" in kwargs["contents"]
|
||||
assert "User: Hello" in kwargs["contents"]
|
||||
|
||||
def test_discussion_compression_anthropic():
|
||||
ai_client.set_provider("anthropic", "claude-3-haiku")
|
||||
|
||||
mock_anthropic = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_block = MagicMock()
|
||||
mock_block.text = "Anthropic summary."
|
||||
mock_response.content = [mock_block]
|
||||
mock_anthropic.messages.create.return_value = mock_response
|
||||
|
||||
with patch("src.ai_client._anthropic_client", mock_anthropic), \
|
||||
patch("src.ai_client._ensure_anthropic_client"):
|
||||
result = ai_client.run_discussion_compression("Some history")
|
||||
|
||||
assert result == "Anthropic summary."
|
||||
mock_anthropic.messages.create.assert_called_once()
|
||||
kwargs = mock_anthropic.messages.create.call_args[1]
|
||||
assert "Some history" in kwargs["messages"][0]["content"]
|
||||
|
||||
def test_discussion_compression_deepseek():
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {"choices": [{"message": {"content": "DeepSeek summary."}}]}
|
||||
|
||||
with patch("src.ai_client.requests.post", return_value=mock_response), \
|
||||
patch("src.ai_client._load_credentials", return_value={"deepseek": {"api_key": "test"}}):
|
||||
result = ai_client.run_discussion_compression("DeepSeek history")
|
||||
|
||||
assert result == "DeepSeek summary."
|
||||
|
||||
def test_discussion_compression_gemini_cli():
|
||||
ai_client.set_provider("gemini_cli", "gemini-1.5-flash")
|
||||
|
||||
mock_adapter = MagicMock()
|
||||
mock_adapter.send.return_value = {"text": "CLI summary."}
|
||||
|
||||
with patch("src.ai_client.GeminiCliAdapter", return_value=mock_adapter):
|
||||
result = ai_client.run_discussion_compression("CLI history")
|
||||
|
||||
assert result == "CLI summary."
|
||||
@@ -0,0 +1,63 @@
|
||||
import pytest
|
||||
from src.app_controller import AppController
|
||||
|
||||
@pytest.fixture
|
||||
def controller():
|
||||
c = AppController()
|
||||
c.init_state()
|
||||
return c
|
||||
|
||||
def test_on_comms_entry_updates_metrics(controller: AppController):
|
||||
# Initial state
|
||||
assert controller.session_usage["input_tokens"] == 0
|
||||
assert controller.session_usage["output_tokens"] == 0
|
||||
assert len(controller._token_history) == 0
|
||||
|
||||
# Simulate an AI response with usage
|
||||
payload = {
|
||||
"text": "Hello world",
|
||||
"usage": {
|
||||
"input_tokens": 100,
|
||||
"output_tokens": 50,
|
||||
"cache_read_input_tokens": 10,
|
||||
"cache_creation_input_tokens": 5,
|
||||
"total_tokens": 165
|
||||
},
|
||||
"model": "test-model"
|
||||
}
|
||||
entry = {
|
||||
"kind": "response",
|
||||
"payload": payload,
|
||||
"ts": "2024-01-01T00:00:00"
|
||||
}
|
||||
|
||||
controller._on_comms_entry(entry)
|
||||
|
||||
# Verify metrics updated
|
||||
assert controller.session_usage["input_tokens"] == 100
|
||||
assert controller.session_usage["output_tokens"] == 50
|
||||
assert controller.session_usage["cache_read_input_tokens"] == 10
|
||||
assert controller.session_usage["cache_creation_input_tokens"] == 5
|
||||
assert controller.session_usage["total_tokens"] == 165
|
||||
|
||||
# Verify token history recorded
|
||||
assert len(controller._token_history) == 1
|
||||
assert controller._token_history[0]["input"] == 100
|
||||
assert controller._token_history[0]["output"] == 50
|
||||
assert controller._token_history[0]["model"] == "test-model"
|
||||
|
||||
def test_on_comms_entry_accumulates_metrics(controller: AppController):
|
||||
# First response
|
||||
controller._on_comms_entry({
|
||||
"kind": "response",
|
||||
"payload": {"usage": {"input_tokens": 100, "output_tokens": 50}}
|
||||
})
|
||||
# Second response
|
||||
controller._on_comms_entry({
|
||||
"kind": "response",
|
||||
"payload": {"usage": {"input_tokens": 200, "output_tokens": 20}}
|
||||
})
|
||||
|
||||
assert controller.session_usage["input_tokens"] == 300
|
||||
assert controller.session_usage["output_tokens"] == 70
|
||||
assert len(controller._token_history) == 2
|
||||
Reference in New Issue
Block a user