import pytest import json import time import copy from pathlib import Path from unittest.mock import MagicMock, patch from src.app_controller import AppController from src import session_logger, paths, ai_client, project_manager from src.result_types import Result, ErrorInfo, ErrorKind @pytest.fixture def tmp_session_dir(tmp_path, monkeypatch): """Set up a temporary session directory for session_logger.""" logs_dir = tmp_path / "logs" scripts_dir = tmp_path / "scripts" logs_dir.mkdir() scripts_dir.mkdir() monkeypatch.setenv("SLOP_LOGS_DIR", str(logs_dir)) monkeypatch.setenv("SLOP_SCRIPTS_DIR", str(scripts_dir)) paths.reset_resolved() # Ensure session_logger is clean with patch("src.session_logger._comms_fh", None): session_logger.open_session("test_offloading") yield logs_dir / session_logger._session_id session_logger.close_session() @pytest.fixture def app_controller(tmp_session_dir): """Create an AppController instance for testing.""" with patch("src.app_controller.performance_monitor.PerformanceMonitor"): ctrl = AppController() # Minimal setup to avoid complex initialization ctrl.ui_auto_add_history = True return ctrl def test_on_comms_entry_tool_result_offloading(app_controller, tmp_session_dir): """ Test that _on_comms_entry offloads tool_result output to a separate file. """ output_content = "This is a large tool output that should be offloaded." entry = { "kind": "tool_result", "payload": { "output": output_content }, "ts": "12:00:00" } # Track calls to session_logger.log_comms with patch("src.session_logger.log_comms") as mock_log_comms: app_controller._on_comms_entry(entry) # 1. Verify log_comms was called with an optimized entry assert mock_log_comms.called optimized_entry = mock_log_comms.call_args[0][0] assert optimized_entry["kind"] == "tool_result" assert "output" in optimized_entry["payload"] # The output should be a reference like [REF:output_0001.txt] ref_text = optimized_entry["payload"]["output"] assert ref_text.startswith("[REF:output_") assert ref_text.endswith(".txt]") # 2. Verify the original entry was NOT modified in terms of its payload content # Wait, the tool uses deepcopy so it should be fine. assert entry["payload"]["output"] == output_content # 3. Verify the offloaded file exists and contains the correct content ref_filename = ref_text[5:-1] # Strip [REF: and ] offloaded_path = tmp_session_dir / "outputs" / ref_filename assert offloaded_path.exists() assert offloaded_path.read_text(encoding="utf-8") == output_content # 4. Verify that effects on internal state (like history adds) use the original output # _on_comms_entry appends to _pending_history_adds with app_controller._pending_history_adds_lock: assert len(app_controller._pending_history_adds) > 0 history_entry = next(e for e in app_controller._pending_history_adds if e["role"] == "Tool") assert output_content in history_entry["content"] assert "[TOOL RESULT]" in history_entry["content"] def test_on_tool_log_offloading(app_controller, tmp_session_dir): """ Test that _on_tool_log calls session_logger.log_tool_call and log_tool_output. """ script = "Get-Process" result = "Process list..." with patch("src.ai_client.get_current_tier", return_value="Tier 3"): app_controller._on_tool_log(script, result) # Verify files were created in session directory scripts_dir = tmp_session_dir / "scripts" outputs_dir = tmp_session_dir / "outputs" script_files = list(scripts_dir.glob("script_*.ps1")) assert len(script_files) == 1 assert script_files[0].read_text(encoding="utf-8") == script output_files = list(outputs_dir.glob("output_*.txt")) # We expect at least one output file for the result assert len(output_files) >= 1 assert any(f.read_text(encoding="utf-8") == result for f in output_files) # Verify AppController internal state with app_controller._pending_tool_calls_lock: assert len(app_controller._pending_tool_calls) == 1 assert app_controller._pending_tool_calls[0]["script"] == script assert app_controller._pending_tool_calls[0]["result"] == result assert app_controller._pending_tool_calls[0]["source_tier"] == "Tier 3" def test_offload_entry_payload_tool_call_unwraps_result(app_controller, tmp_session_dir): """ Regression test for the half-migrated session_logger.log_tool_call call site in _offload_entry_payload (FR5 in spec.md). The bug: session_logger.log_tool_call was partially migrated to return Result[data=str(ps1_path) | None] but the call site at app_controller.py:3721 still does Path(ref_path).name, which raises TypeError when ref_path is a Result object. After the fix, the call site must unwrap the Result and produce a [REF:filename] string in the payload["script"] field. """ script_content = "Get-Process | Select-Object Name" entry = { "kind": "tool_call", "payload": { "script": script_content, }, "ts": "12:00:00" } with patch("src.session_logger.log_comms") as mock_log_comms: app_controller._on_comms_entry(entry) # 1. log_comms was called with an optimized entry assert mock_log_comms.called optimized_entry = mock_log_comms.call_args[0][0] assert optimized_entry["kind"] == "tool_call" assert "script" in optimized_entry["payload"] # 2. The script should be a [REF:script_NNNN.ps1] reference (or unchanged # if offload failed; the canonical happy path produces the ref). ref_text = optimized_entry["payload"]["script"] if ref_text != script_content: assert ref_text.startswith("[REF:script_"), f"expected [REF:script_*] got {ref_text!r}" assert ref_text.endswith(".ps1]"), f"expected [REF:script_*.ps1] got {ref_text!r}" # 3. The offloaded file should exist and contain the original script if ref_text != script_content: ref_filename = ref_text[5:-1] offloaded_path = tmp_session_dir / "scripts" / ref_filename assert offloaded_path.exists() assert offloaded_path.read_text(encoding="utf-8") == script_content # 4. Original entry is NOT mutated (deepcopy semantics) assert entry["payload"]["script"] == script_content def test_offload_entry_payload_preserves_script_on_log_tool_call_error(app_controller, tmp_session_dir, caplog): """ Verify that when session_logger.log_tool_call returns a Result with errors, the payload["script"] is preserved unchanged (and a debug log is emitted per Heuristic #19). """ script_content = "Get-Process" entry = { "kind": "tool_call", "payload": { "script": script_content, }, "ts": "12:00:00" } with patch("src.session_logger.log_tool_call", return_value=Result(data=None, errors=[ErrorInfo( kind=ErrorKind.INTERNAL, message="simulated write failure", source="session_logger.log_tool_call", )])): with patch("src.session_logger.log_comms"): import logging caplog.set_level(logging.DEBUG, logger="src.app_controller") app_controller._on_comms_entry(entry) # Verify a debug log was emitted for the failure debug_records = [r for r in caplog.records if r.levelno == logging.DEBUG and ("offload" in r.message.lower() or "tool_call" in r.message.lower())] assert len(debug_records) >= 1, f"expected debug log for offload failure; got: {[r.message for r in caplog.records]}"