Private
Public Access
0
0
Files
manual_slop/tests/test_ai_loop_regressions_20260614.py
T
ed 4393e831b0 test(ai_client): rename send_result to send in test_ai_loop_regressions_20260614
13 references renamed (planned 12; one extra found in a comment).

Test function test_fr2_send_result_callable_in_app_controller_namespace
renamed to test_fr2_send_callable_in_app_controller_namespace.

7 tests pass.
2026-06-17 00:32:33 -04:00

253 lines
12 KiB
Python

"""
Regression tests for ai_loop_regressions_20260614.
Track: ai_loop_regressions_20260614
Spec: conductor/tracks/ai_loop_regressions_20260614/spec.md
Three bug groups, each with its own FR test prefix:
test_fr1_* -- Bug #2: error response becomes a discussion entry
test_fr2_* -- Bug #1: dead `except ai_client.ProviderError` clauses
test_fr3_* -- Bug #3: MiniMax thinking mono rendering
All tests use 1-space indentation per project style.
"""
import ast
import pytest
from unittest.mock import MagicMock, patch
from src import ai_client
from src import thinking_parser
from src.gui_2 import App
from src.events import UserRequestEvent
from src.result_types import Result, ErrorInfo, ErrorKind
def _drain_queue(app: App) -> list[tuple[str, dict]]:
drained: list[tuple[str, dict]] = []
while not app.controller.event_queue.empty():
drained.append(app.controller.event_queue.get())
return drained
def _make_event(prompt: str = "Hello AI") -> UserRequestEvent:
return UserRequestEvent(
prompt=prompt,
stable_md="Context",
file_items=[],
disc_text="History",
base_dir=".",
)
# region: FR1 tests (Bug #2 -- error response becomes a discussion entry)
def test_fr1_error_becomes_discussion_entry(mock_app: App, monkeypatch: pytest.MonkeyPatch) -> None:
"""
When send returns errors, _handle_request_event must enqueue a
'response' event with status='error' and the error message in the text.
Currently broken: the code calls deprecated ai_client.send() which
silently returns '' on error. The empty string is then routed to the
event_queue as a 'done' response and _on_comms_entry filters it out
via `if text_content.strip():` (src/app_controller.py:3801).
"""
app = mock_app
err = ErrorInfo(kind=ErrorKind.NETWORK, message="connection refused", source="ai_client.test")
err_result = Result(data="", errors=[err])
monkeypatch.setattr(ai_client, "send", lambda *a, **kw: err_result)
monkeypatch.setattr(ai_client, "set_custom_system_prompt", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_base_system_prompt", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_use_default_base_prompt", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_project_context_marker", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_model_params", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_agent_tools", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_current_tier", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "get_combined_system_prompt", lambda *a, **kw: "")
monkeypatch.setattr(ai_client, "get_current_tier", lambda *a, **kw: None)
monkeypatch.setattr("src.app_controller.AppController._update_gcli_adapter", lambda *a, **kw: None)
_drain_queue(app)
app.controller._handle_request_event(_make_event())
events = _drain_queue(app)
response_events = [p for n, p in events if n == "response"]
assert response_events, "No 'response' event was queued for the error case"
payload = response_events[-1]
assert payload["status"] == "error", f"Expected status='error', got {payload.get('status')!r}"
assert "connection refused" in payload["text"], f"Expected error message in text, got {payload.get('text')!r}"
def test_fr1_success_still_works(mock_app: App, monkeypatch: pytest.MonkeyPatch) -> None:
"""
On success, _handle_request_event must enqueue a 'response' event with
status='done' and the data text. This guards against FR1 fix accidentally
breaking the success path.
"""
app = mock_app
ok_result = Result(data="Hello back from AI")
monkeypatch.setattr(ai_client, "send", lambda *a, **kw: ok_result)
monkeypatch.setattr(ai_client, "set_custom_system_prompt", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_base_system_prompt", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_use_default_base_prompt", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_project_context_marker", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_model_params", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_agent_tools", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_current_tier", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "get_combined_system_prompt", lambda *a, **kw: "")
monkeypatch.setattr(ai_client, "get_current_tier", lambda *a, **kw: None)
monkeypatch.setattr("src.app_controller.AppController._update_gcli_adapter", lambda *a, **kw: None)
_drain_queue(app)
app.controller._handle_request_event(_make_event())
events = _drain_queue(app)
response_events = [p for n, p in events if n == "response"]
assert response_events, "No 'response' event was queued for the success case"
payload = response_events[-1]
assert payload["status"] == "done", f"Expected status='done', got {payload.get('status')!r}"
assert payload["text"] == "Hello back from AI", f"Expected data text, got {payload.get('text')!r}"
def test_fr1_ai_status_updated(mock_app: App, monkeypatch: pytest.MonkeyPatch) -> None:
"""
On error, _ai_status must reflect 'error: <message>' (not 'done' and not stale).
"""
app = mock_app
err = ErrorInfo(kind=ErrorKind.RATE_LIMIT, message="slow down", source="ai_client.test")
err_result = Result(data="", errors=[err])
monkeypatch.setattr(ai_client, "send", lambda *a, **kw: err_result)
monkeypatch.setattr(ai_client, "set_custom_system_prompt", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_base_system_prompt", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_use_default_base_prompt", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_project_context_marker", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_model_params", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_agent_tools", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "set_current_tier", lambda *a, **kw: None)
monkeypatch.setattr(ai_client, "get_combined_system_prompt", lambda *a, **kw: "")
monkeypatch.setattr(ai_client, "get_current_tier", lambda *a, **kw: None)
monkeypatch.setattr("src.app_controller.AppController._update_gcli_adapter", lambda *a, **kw: None)
_drain_queue(app)
app.controller._handle_request_event(_make_event())
status = app.controller.ai_status
assert status.startswith("error:"), f"Expected ai_status to start with 'error:', got {status!r}"
assert "slow down" in status, f"Expected error message in status, got {status!r}"
# endregion: FR1 tests
# region: FR2 tests (Bug #1 -- dead except ProviderError clauses)
def test_fr2_no_provider_error_in_source() -> None:
"""
AST scan of src/app_controller.py must show NO references to
`ai_client.ProviderError`. The class was removed in commit 64b787b8
(data_oriented_error_handling_20260606 task 3.7). The 3 dead except
clauses at lines 305, 313, 3692 silently break the error path
(Python evaluates the class on every raised exception; on missing
class, the except clause itself raises AttributeError).
"""
src_path = "src/app_controller.py"
with open(src_path, "r", encoding="utf-8") as f:
src_text = f.read()
tree = ast.parse(src_text)
violations: list[tuple[int, str]] = []
for node in ast.walk(tree):
if isinstance(node, ast.Attribute) and node.attr == "ProviderError":
violations.append((node.lineno, ast.dump(node)))
assert not violations, f"Found {len(violations)} ProviderError reference(s) in {src_path}: {violations}"
def test_fr2_send_callable_in_app_controller_namespace() -> None:
"""
Sanity check: ai_client.send exists and returns a Result. This
guards the FR2 fix path -- the replacement code calls send() and
branches on result.ok.
"""
from src import result_types
assert hasattr(ai_client, "send"), "ai_client.send is the migration target; it must exist"
assert callable(ai_client.send)
ok = ai_client.send("system", "user") if False else None
# Smoke test: just verify the import path and signature; the actual call
# path is exercised in test_ai_client_result.py::test_send_public_api_returns_result
# endregion: FR2 tests
# region: FR3 tests (Bug #3 -- MiniMax thinking mono rendering)
def test_fr3_minimax_thinking_in_returned_text() -> None:
"""
When MiniMax M2.x reasoning is enabled (caps.reasoning=True) and the
upstream response includes a reasoning_details[0].text field, the
returned Result.data must include the reasoning wrapped in
<thinking>...</thinking> tags so thinking_parser.parse_thinking_trace
can extract it for the discussion entry.
Currently broken: run_with_tool_loop stores reasoning_content in the
history list (src/ai_client.py:808) but the returned text is just
response.text without the tags. Compare to DeepSeek (line 2117-2118)
which correctly wraps the reasoning inline.
"""
# Build a fake response carrying reasoning_details
fake_choice = MagicMock()
fake_message = MagicMock()
fake_message.content = "The final answer is 42"
fake_message.reasoning_details = [{"text": "Let me think step by step about this"}]
fake_message.tool_calls = None
fake_choice.message = fake_message
fake_response = MagicMock()
fake_response.choices = [fake_choice]
fake_response.usage = None
fake_raw = MagicMock()
fake_raw.choices = [fake_choice]
captured_text: list[str] = []
def _fake_send_openai_compatible(client, request, *, capabilities):
captured_text.append("send_openai_compatible was called")
return Result(data=MagicMock(
text="The final answer is 42",
tool_calls=[],
usage_input_tokens=0,
usage_output_tokens=0,
usage_cache_read_tokens=0,
usage_cache_creation_tokens=0,
raw_response=fake_raw,
))
from src import openai_compatible as oc
from src.vendor_capabilities import register, VendorCapabilities
register(VendorCapabilities(vendor="minimax", model="MiniMax-M2.7", reasoning=True))
ai_client._model = "MiniMax-M2.7"
with patch.object(oc, "send_openai_compatible", side_effect=_fake_send_openai_compatible), \
patch("src.ai_client._ensure_minimax_client", return_value=MagicMock()), \
patch("src.ai_client._get_deepseek_tools", return_value=[]), \
patch("src.ai_client._trim_minimax_history", side_effect=lambda msgs, h: None), \
patch("src.ai_client._minimax_history", new=[]), \
patch("src.ai_client._minimax_history_lock", new=MagicMock()):
result = ai_client._send_minimax("system", "user", ".", None, "", False, None, None, None)
assert isinstance(result, Result), f"_send_minimax must return a Result, got {type(result).__name__}"
assert result.ok, f"_send_minimax must succeed for this test; got errors: {result.errors}"
data = result.data
assert "<thinking>" in data, f"Expected <thinking> tag in returned text, got: {data!r}"
assert "</thinking>" in data, f"Expected </thinking> closing tag, got: {data!r}"
assert "Let me think step by step about this" in data, f"Expected reasoning text wrapped, got: {data!r}"
assert "The final answer is 42" in data, f"Expected original text after thinking, got: {data!r}"
def test_fr3_minimax_thinking_parsed_by_thinking_parser() -> None:
"""
Once Result.data carries the <thinking> tags, parse_thinking_trace
must extract a ThinkingSegment with the reasoning content. This is
the contract that _on_comms_entry relies on to populate
thinking_segments in the discussion entry.
"""
data = "<thinking>\nLet me think step by step about this\n</thinking>\n\nThe final answer is 42"
segments, response_content = thinking_parser.parse_thinking_trace(data)
assert len(segments) >= 1, f"Expected at least 1 thinking segment, got {len(segments)}: {segments}"
assert "step by step" in segments[0].content, f"Expected reasoning text in segment, got: {segments[0].content!r}"
assert "The final answer is 42" in response_content, f"Expected response text to be preserved, got: {response_content!r}"
# endregion: FR3 tests