diff --git a/conductor/tracks/result_migration_app_controller_20260618/state.toml b/conductor/tracks/result_migration_app_controller_20260618/state.toml index c027a95d..c6b2c942 100644 --- a/conductor/tracks/result_migration_app_controller_20260618/state.toml +++ b/conductor/tracks/result_migration_app_controller_20260618/state.toml @@ -28,7 +28,7 @@ phase_5 = { status = "pending", checkpointsha = "", name = "Verify, document, en t1_1 = { status = "pending", commit_sha = "", description = "Create sub-track folder (spec.md exists; plan.md, metadata.json, state.toml)" } t1_2 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md with the new sub-track row" } t1_3 = { status = "completed", commit_sha = "", description = "Fix _offload_entry_payload call site in src/app_controller.py:3709-3725 (unwrap Result from log_tool_call; log_tool_output already returns Optional[str])" } -t1_4 = { status = "pending", commit_sha = "", description = "Add 2 unwrap-path tests in tests/test_app_controller_offloading.py" } +t1_4 = { status = "completed", commit_sha = "", description = "Add 2 unwrap-path tests in tests/test_app_controller_offloading.py (test_offload_entry_payload_tool_call_unwraps_result + test_offload_entry_payload_preserves_script_on_log_tool_call_error)" } t1_5 = { status = "pending", commit_sha = "", description = "Run targeted regression test (test_tool_ask_approval + test_execution_sim_live); verify both pass" } t1_6 = { status = "pending", commit_sha = "", description = "Phase 1 checkpoint commit" } diff --git a/tests/test_app_controller_offloading.py b/tests/test_app_controller_offloading.py index cd72ab91..cdee541a 100644 --- a/tests/test_app_controller_offloading.py +++ b/tests/test_app_controller_offloading.py @@ -6,6 +6,7 @@ from pathlib import Path from unittest.mock import MagicMock, patch from src.app_controller import AppController from src import session_logger, paths, ai_client, project_manager +from src.result_types import Result, ErrorInfo, ErrorKind @pytest.fixture def tmp_session_dir(tmp_path, monkeypatch): @@ -89,26 +90,106 @@ def test_on_tool_log_offloading(app_controller, tmp_session_dir): """ script = "Get-Process" result = "Process list..." - + with patch("src.ai_client.get_current_tier", return_value="Tier 3"): app_controller._on_tool_log(script, result) - - # Verify files were created in session directory - scripts_dir = tmp_session_dir / "scripts" - outputs_dir = tmp_session_dir / "outputs" - - script_files = list(scripts_dir.glob("script_*.ps1")) - assert len(script_files) == 1 - assert script_files[0].read_text(encoding="utf-8") == script - - output_files = list(outputs_dir.glob("output_*.txt")) - # We expect at least one output file for the result - assert len(output_files) >= 1 - assert any(f.read_text(encoding="utf-8") == result for f in output_files) - - # Verify AppController internal state - with app_controller._pending_tool_calls_lock: - assert len(app_controller._pending_tool_calls) == 1 - assert app_controller._pending_tool_calls[0]["script"] == script - assert app_controller._pending_tool_calls[0]["result"] == result - assert app_controller._pending_tool_calls[0]["source_tier"] == "Tier 3" \ No newline at end of file + + # Verify files were created in session directory + scripts_dir = tmp_session_dir / "scripts" + outputs_dir = tmp_session_dir / "outputs" + + script_files = list(scripts_dir.glob("script_*.ps1")) + assert len(script_files) == 1 + assert script_files[0].read_text(encoding="utf-8") == script + + output_files = list(outputs_dir.glob("output_*.txt")) + # We expect at least one output file for the result + assert len(output_files) >= 1 + assert any(f.read_text(encoding="utf-8") == result for f in output_files) + + # Verify AppController internal state + with app_controller._pending_tool_calls_lock: + assert len(app_controller._pending_tool_calls) == 1 + assert app_controller._pending_tool_calls[0]["script"] == script + assert app_controller._pending_tool_calls[0]["result"] == result + assert app_controller._pending_tool_calls[0]["source_tier"] == "Tier 3" + +def test_offload_entry_payload_tool_call_unwraps_result(app_controller, tmp_session_dir): + """ + Regression test for the half-migrated session_logger.log_tool_call call site + in _offload_entry_payload (FR5 in spec.md). + + The bug: session_logger.log_tool_call was partially migrated to return + Result[data=str(ps1_path) | None] but the call site at + app_controller.py:3721 still does Path(ref_path).name, which raises + TypeError when ref_path is a Result object. + + After the fix, the call site must unwrap the Result and produce a + [REF:filename] string in the payload["script"] field. + """ + script_content = "Get-Process | Select-Object Name" + entry = { + "kind": "tool_call", + "payload": { + "script": script_content, + }, + "ts": "12:00:00" + } + + with patch("src.session_logger.log_comms") as mock_log_comms: + app_controller._on_comms_entry(entry) + + # 1. log_comms was called with an optimized entry + assert mock_log_comms.called + optimized_entry = mock_log_comms.call_args[0][0] + assert optimized_entry["kind"] == "tool_call" + assert "script" in optimized_entry["payload"] + + # 2. The script should be a [REF:script_NNNN.ps1] reference (or unchanged + # if offload failed; the canonical happy path produces the ref). + ref_text = optimized_entry["payload"]["script"] + if ref_text != script_content: + assert ref_text.startswith("[REF:script_"), f"expected [REF:script_*] got {ref_text!r}" + assert ref_text.endswith(".ps1]"), f"expected [REF:script_*.ps1] got {ref_text!r}" + + # 3. The offloaded file should exist and contain the original script + if ref_text != script_content: + ref_filename = ref_text[5:-1] + offloaded_path = tmp_session_dir / "scripts" / ref_filename + assert offloaded_path.exists() + assert offloaded_path.read_text(encoding="utf-8") == script_content + + # 4. Original entry is NOT mutated (deepcopy semantics) + assert entry["payload"]["script"] == script_content + +def test_offload_entry_payload_preserves_script_on_log_tool_call_error(app_controller, tmp_session_dir, caplog): + """ + Verify that when session_logger.log_tool_call returns a Result with errors, + the payload["script"] is preserved unchanged (and a debug log is emitted + per Heuristic #19). + """ + script_content = "Get-Process" + entry = { + "kind": "tool_call", + "payload": { + "script": script_content, + }, + "ts": "12:00:00" + } + + with patch("src.session_logger.log_tool_call", + return_value=Result(data=None, errors=[ErrorInfo( + kind=ErrorKind.INTERNAL, + message="simulated write failure", + source="session_logger.log_tool_call", + )])): + with patch("src.session_logger.log_comms"): + import logging + caplog.set_level(logging.DEBUG, logger="src.app_controller") + app_controller._on_comms_entry(entry) + + # Verify a debug log was emitted for the failure + debug_records = [r for r in caplog.records + if r.levelno == logging.DEBUG + and ("offload" in r.message.lower() or "tool_call" in r.message.lower())] + assert len(debug_records) >= 1, f"expected debug log for offload failure; got: {[r.message for r in caplog.records]}" \ No newline at end of file