4393e831b0
13 references renamed (planned 12; one extra found in a comment). Test function test_fr2_send_result_callable_in_app_controller_namespace renamed to test_fr2_send_callable_in_app_controller_namespace. 7 tests pass.
253 lines
12 KiB
Python
253 lines
12 KiB
Python
"""
|
|
Regression tests for ai_loop_regressions_20260614.
|
|
|
|
Track: ai_loop_regressions_20260614
|
|
Spec: conductor/tracks/ai_loop_regressions_20260614/spec.md
|
|
|
|
Three bug groups, each with its own FR test prefix:
|
|
test_fr1_* -- Bug #2: error response becomes a discussion entry
|
|
test_fr2_* -- Bug #1: dead `except ai_client.ProviderError` clauses
|
|
test_fr3_* -- Bug #3: MiniMax thinking mono rendering
|
|
|
|
All tests use 1-space indentation per project style.
|
|
"""
|
|
import ast
|
|
import pytest
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
from src import ai_client
|
|
from src import thinking_parser
|
|
from src.gui_2 import App
|
|
from src.events import UserRequestEvent
|
|
from src.result_types import Result, ErrorInfo, ErrorKind
|
|
|
|
|
|
def _drain_queue(app: App) -> list[tuple[str, dict]]:
|
|
drained: list[tuple[str, dict]] = []
|
|
while not app.controller.event_queue.empty():
|
|
drained.append(app.controller.event_queue.get())
|
|
return drained
|
|
|
|
|
|
def _make_event(prompt: str = "Hello AI") -> UserRequestEvent:
|
|
return UserRequestEvent(
|
|
prompt=prompt,
|
|
stable_md="Context",
|
|
file_items=[],
|
|
disc_text="History",
|
|
base_dir=".",
|
|
)
|
|
|
|
|
|
# region: FR1 tests (Bug #2 -- error response becomes a discussion entry)
|
|
|
|
def test_fr1_error_becomes_discussion_entry(mock_app: App, monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""
|
|
When send returns errors, _handle_request_event must enqueue a
|
|
'response' event with status='error' and the error message in the text.
|
|
|
|
Currently broken: the code calls deprecated ai_client.send() which
|
|
silently returns '' on error. The empty string is then routed to the
|
|
event_queue as a 'done' response and _on_comms_entry filters it out
|
|
via `if text_content.strip():` (src/app_controller.py:3801).
|
|
"""
|
|
app = mock_app
|
|
err = ErrorInfo(kind=ErrorKind.NETWORK, message="connection refused", source="ai_client.test")
|
|
err_result = Result(data="", errors=[err])
|
|
monkeypatch.setattr(ai_client, "send", lambda *a, **kw: err_result)
|
|
monkeypatch.setattr(ai_client, "set_custom_system_prompt", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_base_system_prompt", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_use_default_base_prompt", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_project_context_marker", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_model_params", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_agent_tools", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_current_tier", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "get_combined_system_prompt", lambda *a, **kw: "")
|
|
monkeypatch.setattr(ai_client, "get_current_tier", lambda *a, **kw: None)
|
|
monkeypatch.setattr("src.app_controller.AppController._update_gcli_adapter", lambda *a, **kw: None)
|
|
_drain_queue(app)
|
|
app.controller._handle_request_event(_make_event())
|
|
events = _drain_queue(app)
|
|
response_events = [p for n, p in events if n == "response"]
|
|
assert response_events, "No 'response' event was queued for the error case"
|
|
payload = response_events[-1]
|
|
assert payload["status"] == "error", f"Expected status='error', got {payload.get('status')!r}"
|
|
assert "connection refused" in payload["text"], f"Expected error message in text, got {payload.get('text')!r}"
|
|
|
|
|
|
def test_fr1_success_still_works(mock_app: App, monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""
|
|
On success, _handle_request_event must enqueue a 'response' event with
|
|
status='done' and the data text. This guards against FR1 fix accidentally
|
|
breaking the success path.
|
|
"""
|
|
app = mock_app
|
|
ok_result = Result(data="Hello back from AI")
|
|
monkeypatch.setattr(ai_client, "send", lambda *a, **kw: ok_result)
|
|
monkeypatch.setattr(ai_client, "set_custom_system_prompt", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_base_system_prompt", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_use_default_base_prompt", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_project_context_marker", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_model_params", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_agent_tools", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_current_tier", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "get_combined_system_prompt", lambda *a, **kw: "")
|
|
monkeypatch.setattr(ai_client, "get_current_tier", lambda *a, **kw: None)
|
|
monkeypatch.setattr("src.app_controller.AppController._update_gcli_adapter", lambda *a, **kw: None)
|
|
_drain_queue(app)
|
|
app.controller._handle_request_event(_make_event())
|
|
events = _drain_queue(app)
|
|
response_events = [p for n, p in events if n == "response"]
|
|
assert response_events, "No 'response' event was queued for the success case"
|
|
payload = response_events[-1]
|
|
assert payload["status"] == "done", f"Expected status='done', got {payload.get('status')!r}"
|
|
assert payload["text"] == "Hello back from AI", f"Expected data text, got {payload.get('text')!r}"
|
|
|
|
|
|
def test_fr1_ai_status_updated(mock_app: App, monkeypatch: pytest.MonkeyPatch) -> None:
|
|
"""
|
|
On error, _ai_status must reflect 'error: <message>' (not 'done' and not stale).
|
|
"""
|
|
app = mock_app
|
|
err = ErrorInfo(kind=ErrorKind.RATE_LIMIT, message="slow down", source="ai_client.test")
|
|
err_result = Result(data="", errors=[err])
|
|
monkeypatch.setattr(ai_client, "send", lambda *a, **kw: err_result)
|
|
monkeypatch.setattr(ai_client, "set_custom_system_prompt", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_base_system_prompt", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_use_default_base_prompt", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_project_context_marker", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_model_params", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_agent_tools", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "set_current_tier", lambda *a, **kw: None)
|
|
monkeypatch.setattr(ai_client, "get_combined_system_prompt", lambda *a, **kw: "")
|
|
monkeypatch.setattr(ai_client, "get_current_tier", lambda *a, **kw: None)
|
|
monkeypatch.setattr("src.app_controller.AppController._update_gcli_adapter", lambda *a, **kw: None)
|
|
_drain_queue(app)
|
|
app.controller._handle_request_event(_make_event())
|
|
status = app.controller.ai_status
|
|
assert status.startswith("error:"), f"Expected ai_status to start with 'error:', got {status!r}"
|
|
assert "slow down" in status, f"Expected error message in status, got {status!r}"
|
|
|
|
|
|
# endregion: FR1 tests
|
|
|
|
|
|
# region: FR2 tests (Bug #1 -- dead except ProviderError clauses)
|
|
|
|
def test_fr2_no_provider_error_in_source() -> None:
|
|
"""
|
|
AST scan of src/app_controller.py must show NO references to
|
|
`ai_client.ProviderError`. The class was removed in commit 64b787b8
|
|
(data_oriented_error_handling_20260606 task 3.7). The 3 dead except
|
|
clauses at lines 305, 313, 3692 silently break the error path
|
|
(Python evaluates the class on every raised exception; on missing
|
|
class, the except clause itself raises AttributeError).
|
|
"""
|
|
src_path = "src/app_controller.py"
|
|
with open(src_path, "r", encoding="utf-8") as f:
|
|
src_text = f.read()
|
|
tree = ast.parse(src_text)
|
|
violations: list[tuple[int, str]] = []
|
|
for node in ast.walk(tree):
|
|
if isinstance(node, ast.Attribute) and node.attr == "ProviderError":
|
|
violations.append((node.lineno, ast.dump(node)))
|
|
assert not violations, f"Found {len(violations)} ProviderError reference(s) in {src_path}: {violations}"
|
|
|
|
|
|
def test_fr2_send_callable_in_app_controller_namespace() -> None:
|
|
"""
|
|
Sanity check: ai_client.send exists and returns a Result. This
|
|
guards the FR2 fix path -- the replacement code calls send() and
|
|
branches on result.ok.
|
|
"""
|
|
from src import result_types
|
|
assert hasattr(ai_client, "send"), "ai_client.send is the migration target; it must exist"
|
|
assert callable(ai_client.send)
|
|
ok = ai_client.send("system", "user") if False else None
|
|
# Smoke test: just verify the import path and signature; the actual call
|
|
# path is exercised in test_ai_client_result.py::test_send_public_api_returns_result
|
|
|
|
|
|
# endregion: FR2 tests
|
|
|
|
|
|
# region: FR3 tests (Bug #3 -- MiniMax thinking mono rendering)
|
|
|
|
def test_fr3_minimax_thinking_in_returned_text() -> None:
|
|
"""
|
|
When MiniMax M2.x reasoning is enabled (caps.reasoning=True) and the
|
|
upstream response includes a reasoning_details[0].text field, the
|
|
returned Result.data must include the reasoning wrapped in
|
|
<thinking>...</thinking> tags so thinking_parser.parse_thinking_trace
|
|
can extract it for the discussion entry.
|
|
|
|
Currently broken: run_with_tool_loop stores reasoning_content in the
|
|
history list (src/ai_client.py:808) but the returned text is just
|
|
response.text without the tags. Compare to DeepSeek (line 2117-2118)
|
|
which correctly wraps the reasoning inline.
|
|
"""
|
|
# Build a fake response carrying reasoning_details
|
|
fake_choice = MagicMock()
|
|
fake_message = MagicMock()
|
|
fake_message.content = "The final answer is 42"
|
|
fake_message.reasoning_details = [{"text": "Let me think step by step about this"}]
|
|
fake_message.tool_calls = None
|
|
fake_choice.message = fake_message
|
|
fake_response = MagicMock()
|
|
fake_response.choices = [fake_choice]
|
|
fake_response.usage = None
|
|
fake_raw = MagicMock()
|
|
fake_raw.choices = [fake_choice]
|
|
|
|
captured_text: list[str] = []
|
|
|
|
def _fake_send_openai_compatible(client, request, *, capabilities):
|
|
captured_text.append("send_openai_compatible was called")
|
|
return Result(data=MagicMock(
|
|
text="The final answer is 42",
|
|
tool_calls=[],
|
|
usage_input_tokens=0,
|
|
usage_output_tokens=0,
|
|
usage_cache_read_tokens=0,
|
|
usage_cache_creation_tokens=0,
|
|
raw_response=fake_raw,
|
|
))
|
|
|
|
from src import openai_compatible as oc
|
|
from src.vendor_capabilities import register, VendorCapabilities
|
|
register(VendorCapabilities(vendor="minimax", model="MiniMax-M2.7", reasoning=True))
|
|
ai_client._model = "MiniMax-M2.7"
|
|
|
|
with patch.object(oc, "send_openai_compatible", side_effect=_fake_send_openai_compatible), \
|
|
patch("src.ai_client._ensure_minimax_client", return_value=MagicMock()), \
|
|
patch("src.ai_client._get_deepseek_tools", return_value=[]), \
|
|
patch("src.ai_client._trim_minimax_history", side_effect=lambda msgs, h: None), \
|
|
patch("src.ai_client._minimax_history", new=[]), \
|
|
patch("src.ai_client._minimax_history_lock", new=MagicMock()):
|
|
result = ai_client._send_minimax("system", "user", ".", None, "", False, None, None, None)
|
|
|
|
assert isinstance(result, Result), f"_send_minimax must return a Result, got {type(result).__name__}"
|
|
assert result.ok, f"_send_minimax must succeed for this test; got errors: {result.errors}"
|
|
data = result.data
|
|
assert "<thinking>" in data, f"Expected <thinking> tag in returned text, got: {data!r}"
|
|
assert "</thinking>" in data, f"Expected </thinking> closing tag, got: {data!r}"
|
|
assert "Let me think step by step about this" in data, f"Expected reasoning text wrapped, got: {data!r}"
|
|
assert "The final answer is 42" in data, f"Expected original text after thinking, got: {data!r}"
|
|
|
|
|
|
def test_fr3_minimax_thinking_parsed_by_thinking_parser() -> None:
|
|
"""
|
|
Once Result.data carries the <thinking> tags, parse_thinking_trace
|
|
must extract a ThinkingSegment with the reasoning content. This is
|
|
the contract that _on_comms_entry relies on to populate
|
|
thinking_segments in the discussion entry.
|
|
"""
|
|
data = "<thinking>\nLet me think step by step about this\n</thinking>\n\nThe final answer is 42"
|
|
segments, response_content = thinking_parser.parse_thinking_trace(data)
|
|
assert len(segments) >= 1, f"Expected at least 1 thinking segment, got {len(segments)}: {segments}"
|
|
assert "step by step" in segments[0].content, f"Expected reasoning text in segment, got: {segments[0].content!r}"
|
|
assert "The final answer is 42" in response_content, f"Expected response text to be preserved, got: {response_content!r}"
|
|
|
|
|
|
# endregion: FR3 tests
|