Private
Public Access
0
0

test: Add extensive coverage for discussion metrics and compression

- Add tests/test_discussion_compression.py to verify AI sub-agent compression logic across Gemini, Anthropic, DeepSeek, and Gemini CLI providers.
- Add tests/test_discussion_metrics.py to verify AppController correctly extracts and accumulates token usage (input/output/cache) and logs token history.
This commit is contained in:
2026-06-02 01:42:41 -04:00
parent 5b7b818ed2
commit 34a7f00cb3
2 changed files with 132 additions and 0 deletions
+69
View File
@@ -0,0 +1,69 @@
import pytest
from unittest.mock import MagicMock, patch
from src import ai_client
@pytest.fixture(autouse=True)
def reset_ai_client():
ai_client.reset_session()
yield
ai_client.reset_session()
def test_discussion_compression_gemini():
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
mock_gemini = MagicMock()
mock_response = MagicMock()
mock_response.text = "This is a compressed summary."
mock_gemini.models.generate_content.return_value = mock_response
with patch("src.ai_client._gemini_client", mock_gemini), \
patch("src.ai_client._ensure_gemini_client"):
result = ai_client.run_discussion_compression("User: Hello\nAI: Hi there!")
assert result == "This is a compressed summary."
mock_gemini.models.generate_content.assert_called_once()
args, kwargs = mock_gemini.models.generate_content.call_args
assert "[HISTORY]" in kwargs["contents"]
assert "User: Hello" in kwargs["contents"]
def test_discussion_compression_anthropic():
ai_client.set_provider("anthropic", "claude-3-haiku")
mock_anthropic = MagicMock()
mock_response = MagicMock()
mock_block = MagicMock()
mock_block.text = "Anthropic summary."
mock_response.content = [mock_block]
mock_anthropic.messages.create.return_value = mock_response
with patch("src.ai_client._anthropic_client", mock_anthropic), \
patch("src.ai_client._ensure_anthropic_client"):
result = ai_client.run_discussion_compression("Some history")
assert result == "Anthropic summary."
mock_anthropic.messages.create.assert_called_once()
kwargs = mock_anthropic.messages.create.call_args[1]
assert "Some history" in kwargs["messages"][0]["content"]
def test_discussion_compression_deepseek():
ai_client.set_provider("deepseek", "deepseek-chat")
mock_response = MagicMock()
mock_response.json.return_value = {"choices": [{"message": {"content": "DeepSeek summary."}}]}
with patch("src.ai_client.requests.post", return_value=mock_response), \
patch("src.ai_client._load_credentials", return_value={"deepseek": {"api_key": "test"}}):
result = ai_client.run_discussion_compression("DeepSeek history")
assert result == "DeepSeek summary."
def test_discussion_compression_gemini_cli():
ai_client.set_provider("gemini_cli", "gemini-1.5-flash")
mock_adapter = MagicMock()
mock_adapter.send.return_value = {"text": "CLI summary."}
with patch("src.ai_client.GeminiCliAdapter", return_value=mock_adapter):
result = ai_client.run_discussion_compression("CLI history")
assert result == "CLI summary."
+63
View File
@@ -0,0 +1,63 @@
import pytest
from src.app_controller import AppController
@pytest.fixture
def controller():
c = AppController()
c.init_state()
return c
def test_on_comms_entry_updates_metrics(controller: AppController):
# Initial state
assert controller.session_usage["input_tokens"] == 0
assert controller.session_usage["output_tokens"] == 0
assert len(controller._token_history) == 0
# Simulate an AI response with usage
payload = {
"text": "Hello world",
"usage": {
"input_tokens": 100,
"output_tokens": 50,
"cache_read_input_tokens": 10,
"cache_creation_input_tokens": 5,
"total_tokens": 165
},
"model": "test-model"
}
entry = {
"kind": "response",
"payload": payload,
"ts": "2024-01-01T00:00:00"
}
controller._on_comms_entry(entry)
# Verify metrics updated
assert controller.session_usage["input_tokens"] == 100
assert controller.session_usage["output_tokens"] == 50
assert controller.session_usage["cache_read_input_tokens"] == 10
assert controller.session_usage["cache_creation_input_tokens"] == 5
assert controller.session_usage["total_tokens"] == 165
# Verify token history recorded
assert len(controller._token_history) == 1
assert controller._token_history[0]["input"] == 100
assert controller._token_history[0]["output"] == 50
assert controller._token_history[0]["model"] == "test-model"
def test_on_comms_entry_accumulates_metrics(controller: AppController):
# First response
controller._on_comms_entry({
"kind": "response",
"payload": {"usage": {"input_tokens": 100, "output_tokens": 50}}
})
# Second response
controller._on_comms_entry({
"kind": "response",
"payload": {"usage": {"input_tokens": 200, "output_tokens": 20}}
})
assert controller.session_usage["input_tokens"] == 300
assert controller.session_usage["output_tokens"] == 70
assert len(controller._token_history) == 2