Harden reliability, security, and UX across core modules

- Add thread safety: _anthropic_history_lock and _send_lock in ai_client to prevent concurrent corruption - Add _send_thread_lock in gui_2 for atomic check-and-start of send thread - Add atexit fallback in session_logger to flush log files on abnormal exit - Fix file descriptor leaks: use context managers for urlopen in mcp_client - Cap unbounded tool output growth at 500KB per send() call (both Gemini and Anthropic) - Harden path traversal: resolve(strict=True) with fallback in mcp_client allowlist checks - Add SLOP_CREDENTIALS env var override for credentials.toml with helpful error message - Fix Gemini token heuristic: use _CHARS_PER_TOKEN (3.5) instead of hardcoded // 4 - Add keyboard shortcuts: Ctrl+Enter to send, Ctrl+L to clear message input - Add auto-save: flush project and config to disk every 60 seconds
2026-02-23 21:29:30 -05:00
37 changed files with 430 additions and 1296 deletions
@@ -17,7 +17,9 @@ import time
 import datetime
 import hashlib
 import difflib
+import threading
 from pathlib import Path
+import os
 import file_cache
 import mcp_client
 import anthropic
@@ -53,6 +55,8 @@ _GEMINI_CACHE_TTL = 3600

 _anthropic_client = None
 _anthropic_history: list[dict] = []
+_anthropic_history_lock = threading.Lock()
+_send_lock = threading.Lock()

 # Injected by gui.py - called when AI wants to run a command.
 # Signature: (script: str, base_dir: str) -> str | None
@@ -69,6 +73,10 @@ tool_log_callback = None
 # Increased to allow thorough code exploration before forcing a summary
 MAX_TOOL_ROUNDS = 10

+# Maximum cumulative bytes of tool output allowed per send() call.
+# Prevents unbounded memory growth during long tool-calling loops.
+_MAX_TOOL_OUTPUT_BYTES = 500_000
+
 # Maximum characters per text chunk sent to Anthropic.
 # Kept well under the ~200k token API limit.
 _ANTHROPIC_CHUNK_SIZE = 120_000
@@ -130,8 +138,18 @@ def clear_comms_log():


 def _load_credentials() -> dict:
-    with open("credentials.toml", "rb") as f:
-        return tomllib.load(f)
+    cred_path = os.environ.get("SLOP_CREDENTIALS", "credentials.toml")
+    try:
+        with open(cred_path, "rb") as f:
+            return tomllib.load(f)
+    except FileNotFoundError:
+        raise FileNotFoundError(
+            f"Credentials file not found: {cred_path}\n"
+            f"Create a credentials.toml with:\n"
+            f"  [gemini]\n  api_key = \"your-key\"\n"
+            f"  [anthropic]\n  api_key = \"your-key\"\n"
+            f"Or set SLOP_CREDENTIALS env var to a custom path."
+        )


 # ------------------------------------------------------------------ provider errors
@@ -246,7 +264,8 @@ def reset_session():
    _gemini_cache_md_hash = None
    _gemini_cache_created_at = None
    _anthropic_client = None
-    _anthropic_history = []
+    with _anthropic_history_lock:
+        _anthropic_history = []
    _CACHED_ANTHROPIC_TOOLS = None
    file_cache.reset_client()

@@ -652,6 +671,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,

        _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
        payload, all_text = user_message, []
+        _cumulative_tool_bytes = 0

        # Strip stale file refreshes and truncate old tool outputs ONCE before
        # entering the tool loop (not per-round — history entries don't change).
@@ -701,11 +721,11 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
                        if not hist: break
                        for p in hist[0].parts:
                            if hasattr(p, "text") and p.text:
-                                saved += len(p.text) // 4
+                                saved += int(len(p.text) / _CHARS_PER_TOKEN)
                            elif hasattr(p, "function_response") and p.function_response:
                                r = getattr(p.function_response, "response", {})
                                if isinstance(r, dict):
-                                    saved += len(str(r.get("output", ""))) // 4
+                                    saved += int(len(str(r.get("output", ""))) / _CHARS_PER_TOKEN)
                        hist.pop(0)
                        dropped += 1
                    total_in -= max(saved, 200)
@@ -736,10 +756,17 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
                    if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"

                out = _truncate_tool_output(out)
+                _cumulative_tool_bytes += len(out)
                f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
                log.append({"tool_use_id": name, "content": out})
                events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": r_idx})
-            
+
+            if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
+                f_resps.append(types.Part.from_text(
+                    f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
+                ))
+                _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
+
            _append_comms("OUT", "tool_result_send", {"results": log})
            payload = f_resps
            
@@ -1046,6 +1073,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
        })

        all_text_parts = []
+        _cumulative_tool_bytes = 0

        # We allow MAX_TOOL_ROUNDS, plus 1 final loop to get the text synthesis
        for round_idx in range(MAX_TOOL_ROUNDS + 2):
@@ -1132,10 +1160,12 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
                    _append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input})
                    output = mcp_client.dispatch(b_name, b_input)
                    _append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": output})
+                    truncated = _truncate_tool_output(output)
+                    _cumulative_tool_bytes += len(truncated)
                    tool_results.append({
                        "type":        "tool_result",
                        "tool_use_id": b_id,
-                        "content":     _truncate_tool_output(output),
+                        "content":     truncated,
                    })
                    events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
                elif b_name == TOOL_NAME:
@@ -1151,13 +1181,22 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
                        "id":     b_id,
                        "output": output,
                    })
+                    truncated = _truncate_tool_output(output)
+                    _cumulative_tool_bytes += len(truncated)
                    tool_results.append({
                        "type":        "tool_result",
                        "tool_use_id": b_id,
-                        "content":     _truncate_tool_output(output),
+                        "content":     truncated,
                    })
                    events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})

+            if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
+                tool_results.append({
+                    "type": "text",
+                    "text": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
+                })
+                _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
+
            # Refresh file context after tool calls — only inject CHANGED files
            if file_items:
                file_items, changed = _reread_file_items(file_items)
@@ -1220,11 +1259,12 @@ def send(
    discussion_history : discussion history text (used by Gemini to inject as
                         conversation message instead of caching it)
    """
-    if _provider == "gemini":
-        return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history)
-    elif _provider == "anthropic":
-        return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history)
-    raise ValueError(f"unknown provider: {_provider}")
+    with _send_lock:
+        if _provider == "gemini":
+            return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history)
+        elif _provider == "anthropic":
+            return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history)
+        raise ValueError(f"unknown provider: {_provider}")

 def get_history_bleed_stats() -> dict:
    """
@@ -1232,7 +1272,9 @@ def get_history_bleed_stats() -> dict:
    """
    if _provider == "anthropic":
        # For Anthropic, we have a robust estimator
-        current_tokens = _estimate_prompt_tokens([], _anthropic_history)
+        with _anthropic_history_lock:
+            history_snapshot = list(_anthropic_history)
+        current_tokens = _estimate_prompt_tokens([], history_snapshot)
        limit_tokens = _ANTHROPIC_MAX_PROMPT_TOKENS
        percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
        return {
@@ -1276,4 +1318,4 @@ def get_history_bleed_stats() -> dict:
        "limit": 0,
        "current": 0,
        "percentage": 0,
-    }
+    }
@@ -3,7 +3,7 @@ import json
 import time

 class ApiHookClient:
-    def __init__(self, base_url="http://127.0.0.1:8999", max_retries=5, retry_delay=2):
+    def __init__(self, base_url="http://127.0.0.1:8999", max_retries=3, retry_delay=1):
        self.base_url = base_url
        self.max_retries = max_retries
        self.retry_delay = retry_delay
@@ -29,9 +29,9 @@ class ApiHookClient:
        for attempt in range(self.max_retries + 1):
            try:
                if method == 'GET':
-                    response = requests.get(url, timeout=5)
+                    response = requests.get(url, timeout=2)
                elif method == 'POST':
-                    response = requests.post(url, json=data, headers=headers, timeout=5)
+                    response = requests.post(url, json=data, headers=headers, timeout=2)
                else:
                    raise ValueError(f"Unsupported HTTP method: {method}")
                
@@ -83,53 +83,3 @@ class ApiHookClient:

    def post_gui(self, gui_data):
        return self._make_request('POST', '/api/gui', data=gui_data)
-
-    def select_tab(self, tab_bar, tab):
-        """Tells the GUI to switch to a specific tab in a tab bar."""
-        return self.post_gui({
-            "action": "select_tab",
-            "tab_bar": tab_bar,
-            "tab": tab
-        })
-
-    def select_list_item(self, listbox, item_value):
-        """Tells the GUI to select an item in a listbox by its value."""
-        return self.post_gui({
-            "action": "select_list_item",
-            "listbox": listbox,
-            "item_value": item_value
-        })
-
-    def set_value(self, item, value):
-        """Sets the value of a GUI item."""
-        return self.post_gui({
-            "action": "set_value",
-            "item": item,
-            "value": value
-        })
-
-    def click(self, item, *args, **kwargs):
-        """Simulates a click on a GUI button or item."""
-        user_data = kwargs.pop('user_data', None)
-        return self.post_gui({
-            "action": "click",
-            "item": item,
-            "args": args,
-            "kwargs": kwargs,
-            "user_data": user_data
-        })
-
-    def get_indicator_state(self, tag):
-        """Checks if an indicator is shown using the diagnostics endpoint."""
-        # Mapping tag to the keys used in diagnostics endpoint
-        mapping = {
-            "thinking_indicator": "thinking",
-            "operations_live_indicator": "live",
-            "prior_session_indicator": "prior"
-        }
-        key = mapping.get(tag, tag)
-        try:
-            diag = self._make_request('GET', '/api/gui/diagnostics')
-            return {"tag": tag, "shown": diag.get(key, False)}
-        except Exception as e:
-            return {"tag": tag, "shown": False, "error": str(e)}
@@ -21,12 +21,11 @@ class HookHandler(BaseHTTPRequestHandler):
            self.end_headers()
            self.wfile.write(json.dumps({'status': 'ok'}).encode('utf-8'))
        elif self.path == '/api/project':
-            import project_manager
            self.send_response(200)
            self.send_header('Content-Type', 'application/json')
            self.end_headers()
-            flat = project_manager.flat_config(app.project)
-            self.wfile.write(json.dumps({'project': flat}).encode('utf-8'))
+            self.wfile.write(
+                json.dumps({'project': app.project}).encode('utf-8'))
        elif self.path == '/api/session':
            self.send_response(200)
            self.send_header('Content-Type', 'application/json')
@@ -42,35 +41,6 @@ class HookHandler(BaseHTTPRequestHandler):
            if hasattr(app, 'perf_monitor'):
                metrics = app.perf_monitor.get_metrics()
            self.wfile.write(json.dumps({'performance': metrics}).encode('utf-8'))
-        elif self.path == '/api/gui/diagnostics':
-            # Safe way to query multiple states at once via the main thread queue
-            event = threading.Event()
-            result = {}
-            
-            def check_all():
-                import dearpygui.dearpygui as dpg
-                try:
-                    result["thinking"] = dpg.is_item_shown("thinking_indicator") if dpg.does_item_exist("thinking_indicator") else False
-                    result["live"] = dpg.is_item_shown("operations_live_indicator") if dpg.does_item_exist("operations_live_indicator") else False
-                    result["prior"] = dpg.is_item_shown("prior_session_indicator") if dpg.does_item_exist("prior_session_indicator") else False
-                finally:
-                    event.set()
-
-            with app._pending_gui_tasks_lock:
-                app._pending_gui_tasks.append({
-                    "action": "custom_callback",
-                    "callback": check_all
-                })
-            
-            if event.wait(timeout=2):
-                self.send_response(200)
-                self.send_header('Content-Type', 'application/json')
-                self.end_headers()
-                self.wfile.write(json.dumps(result).encode('utf-8'))
-            else:
-                self.send_response(504)
-                self.end_headers()
-                self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
        else:
            self.send_response(404)
            self.end_headers()
@@ -100,6 +70,11 @@ class HookHandler(BaseHTTPRequestHandler):
                self.wfile.write(
                    json.dumps({'status': 'updated'}).encode('utf-8'))
            elif self.path == '/api/gui':
+                if not hasattr(app, '_pending_gui_tasks'):
+                    app._pending_gui_tasks = []
+                if not hasattr(app, '_pending_gui_tasks_lock'):
+                    app._pending_gui_tasks_lock = threading.Lock()
+                    
                with app._pending_gui_tasks_lock:
                    app._pending_gui_tasks.append(data)
                    
@@ -130,13 +105,6 @@ class HookServer:
    def start(self):
        if not getattr(self.app, 'test_hooks_enabled', False):
            return
-            
-        # Ensure the app has the task queue and lock initialized
-        if not hasattr(self.app, '_pending_gui_tasks'):
-            self.app._pending_gui_tasks = []
-        if not hasattr(self.app, '_pending_gui_tasks_lock'):
-            self.app._pending_gui_tasks_lock = threading.Lock()
-            
        self.server = HookServerInstance(('127.0.0.1', self.port), HookHandler, self.app)
        self.thread = threading.Thread(target=self.server.serve_forever, daemon=True)
        self.thread.start()
@@ -1,40 +0,0 @@
-# Implementation Plan: Human-Like UX Interaction Test
-
-## Phase 1: Infrastructure & Automation Core [checkpoint: 7626531]
-Establish the foundation for driving the GUI via API hooks and simulation logic.
-
- [x] Task: Extend `ApiHookClient` with methods for tab switching and listbox selection if missing. f36d539
- [x] Task: Implement `TestUserAgent` class to manage dynamic response generation and action delays. d326242
- [x] Task: Write Tests (Verify basic hook connectivity and simulated delays) f36d539
- [x] Task: Implement basic 'ping-pong' interaction via hooks. bfe9ef0
- [x] Task: Harden API hook thread-safety and simplify GUI state polling. 8bd280e
- [x] Task: Conductor - User Manual Verification 'Phase 1: Infrastructure & Automation Core' (Protocol in workflow.md) 7626531
-
-## Phase 2: Workflow Simulation [checkpoint: 9c4a72c]
-Build the core interaction loop for project creation and AI discussion.
-
- [x] Task: Implement 'New Project' scaffolding script (creating a tiny console program). bd5dc16
- [x] Task: Implement 5-turn discussion loop logic with sub-agent responses. bd5dc16
- [x] Task: Write Tests (Verify state changes in Discussion Hub during simulated chat) 6d16438
- [x] Task: Implement 'Thinking' and 'Live' indicator verification logic. 6d16438
- [x] Task: Conductor - User Manual Verification 'Phase 2: Workflow Simulation' (Protocol in workflow.md) 9c4a72c
-
-## Phase 3: History & Session Verification [checkpoint: 0f04e06]
-Simulate complex session management and historical audit features.
-
- [x] Task: Implement discussion switching logic (creating/switching between named discussions). 5e1b965
- [x] Task: Implement 'Load Prior Log' simulation and 'Tinted Mode' detection. 5e1b965
- [x] Task: Write Tests (Verify log loading and tab navigation consistency) 5e1b965
- [x] Task: Implement truncation limit verification (forcing a long history and checking bleed). 5e1b965
- [x] Task: Conductor - User Manual Verification 'Phase 3: History & Session Verification' (Protocol in workflow.md) 0f04e06
-
-## Phase 4: Final Integration & Regression [checkpoint: 8e63b31]
-Consolidate the simulation into end-user artifacts and CI tests.
-
- [x] Task: Create `live_walkthrough.py` with full visual feedback and manual sign-off. 8bd280e
- [x] Task: Create `tests/test_live_workflow.py` for automated regression testing. 8bd280e
- [x] Task: Perform a full visual walkthrough and verify 'human-readable' pace. 8e63b31
- [x] Task: Conductor - User Manual Verification 'Phase 4: Final Integration & Regression' (Protocol in workflow.md) 8e63b31
-
-## Phase: Review Fixes
- [x] Task: Apply review suggestions 064d7ba
@@ -15,5 +15,4 @@ To serve as an expert-level utility for personal developer use on small projects
 - **In-Depth Toolset Access:** MCP-like file exploration, URL fetching, search, and dynamic context aggregation embedded within a multi-viewport Dear PyGui/ImGui interface.
 - **Integrated Workspace:** A consolidated Hub-based layout (Context, AI Settings, Discussion, Operations) designed for expert multi-monitor workflows.
 - **Session Analysis:** Ability to load and visualize historical session logs with a dedicated tinted "Prior Session" viewing mode.
- **Performance Diagnostics:** Built-in telemetry for FPS, Frame Time, and CPU usage, with a dedicated Diagnostics Panel and AI API hooks for performance analysis.
- **Automated UX Verification:** A robust IPC mechanism via API hooks allows for human-like simulation walkthroughs and automated regression testing of the full GUI lifecycle.
+- **Performance Diagnostics:** Built-in telemetry for FPS, Frame Time, and CPU usage, with a dedicated Diagnostics Panel and AI API hooks for performance analysis.
@@ -15,8 +15,6 @@
 - **tomli-w:** For writing TOML configuration files.
 - **psutil:** For system and process monitoring (CPU/Memory telemetry).
 - **uv:** An extremely fast Python package and project manager.
- **pytest:** For unit and integration testing, leveraging custom fixtures for live GUI verification.
- **ApiHookClient:** A dedicated IPC client for automated GUI interaction and state inspection.

 ## Architectural Patterns
 - **Event-Driven Metrics:** Uses a custom `EventEmitter` to decouple API lifecycle events from UI rendering, improving performance and responsiveness.
@@ -7,13 +7,13 @@ This file tracks all major tracks for the project. Each track has its own detail
 - [x] **Track: Implement context visualization and memory management improvements**
  *Link: [./tracks/context_management_20260223/](./tracks/context_management_20260223/)*

-
-
-
-
-
-
 ---

- [~] **Track: get gui_2 working with latest changes to the project.**
-*Link: [./tracks/gui2_feature_parity_20260223/](./tracks/gui2_feature_parity_20260223/)*
+- [ ] **Track: Make a human-like test ux interaction where the AI creates a small python project, engages in a 5-turn discussion, and verifies history/session management features via API hooks.**
+*Link: [./tracks/live_ux_test_20260223/](./tracks/live_ux_test_20260223/)*
+
+
+
+
+
+
@@ -1,5 +0,0 @@
-# Track gui2_feature_parity_20260223 Context
-
- [Specification](./spec.md)
- [Implementation Plan](./plan.md)
- [Metadata](./metadata.json)
@@ -1,8 +0,0 @@
-{
-  "track_id": "gui2_feature_parity_20260223",
-  "type": "feature",
-  "status": "new",
-  "created_at": "2026-02-23T20:15:30Z",
-  "updated_at": "2026-02-23T20:15:30Z",
-  "description": "get gui_2 working with latest changes to the project."
-}
@@ -1,79 +0,0 @@
-# Implementation Plan: GUIv2 Feature Parity
-
-## Phase 1: Core Architectural Integration
-
- [x] **Task:** Integrate `events.py` into `gui_2.py`. [24b831c]
-    - [x] Sub-task: Import the `events` module in `gui_2.py`.
-    - [x] Sub-task: Refactor the `ai_client` call in `_do_send` to use the event-driven `send` method.
-    - [x] Sub-task: Create event handlers in `App` class for `request_start`, `response_received`, and `tool_execution`.
-    - [x] Sub-task: Subscribe the handlers to `ai_client.events` upon `App` initialization.
- [~] **Task:** Integrate `mcp_client.py` for native file tools.
-    - [ ] Sub-task: Import `mcp_client` in `gui_2.py`.
-    - [ ] Sub-task: Add `mcp_client.perf_monitor_callback` to the `App` initialization.
-    - [ ] Sub-task: In `ai_client`, ensure the MCP tools are registered and available for the AI to call when `gui_2.py` is the active UI.
- [ ] **Task:** Write tests for new core integrations.
-    - [x] Sub-task: Create `tests/test_gui2_events.py` to verify that `gui_2.py` correctly handles AI lifecycle events.
-    - [ ] Sub-task: Create `tests/test_gui2_mcp.py` to verify that the AI can use MCP tools through `gui_2.py`.
- [ ] **Task:** Conductor - User Manual Verification 'Core Architectural Integration' (Protocol in workflow.md)
-
-## Phase 2: Major Feature Implementation
-
- [x] **Task:** Port the API Hooks System. [merged]
-    - [x] Sub-task: Import `api_hooks` in `gui_2.py`.
-    - [x] Sub-task: Instantiate `HookServer` in the `App` class.
-    - [x] Sub-task: Implement the logic to start the server based on a CLI flag (e.g., `--enable-test-hooks`).
-    - [x] Sub-task: Implement the queue and lock for pending GUI tasks from the hook server, similar to `gui.py`.
-    - [x] Sub-task: Add a main loop task to process the GUI task queue.
- [x] **Task:** Port the Performance & Diagnostics feature. [merged]
-    - [x] Sub-task: Import `PerformanceMonitor` in `gui_2.py`.
-    - [x] Sub-task: Instantiate `PerformanceMonitor` in the `App` class.
-    - [x] Sub-task: Create a new "Diagnostics" window in `gui_2.py`.
-    - [x] Sub-task: Add UI elements (plots, labels) to the Diagnostics window to display FPS, CPU, frame time, etc.
-    - [x] Sub-task: Add a throttled update mechanism in the main loop to refresh diagnostics data.
- [x] **Task:** Implement the Prior Session Viewer. [merged]
-    - [x] Sub-task: Add a "Load Prior Session" button to the UI.
-    - [x] Sub-task: Implement the file dialog logic to select a `.log` file.
-    - [x] Sub-task: Implement the logic to parse the log file and populate the comms history view.
-    - [x] Sub-task: Implement the "tinted" theme application when in viewing mode and a way to exit this mode.
- [ ] **Task:** Write tests for major features.
-    - [ ] Sub-task: Create `tests/test_gui2_api_hooks.py` to test the hook server integration.
-    - [ ] Sub-task: Create `tests/test_gui2_diagnostics.py` to verify the diagnostics panel displays data.
- [ ] **Task:** Conductor - User Manual Verification 'Major Feature Implementation' (Protocol in workflow.md)
-
-## Phase 3: UI/UX Refinement
-
- [ ] **Task:** Refactor UI to a "Hub" based layout.
-    - [ ] Sub-task: Analyze the docking layout of `gui.py`.
-    - [ ] Sub-task: Create wrapper windows for "Context Hub", "AI Settings Hub", "Discussion Hub", and "Operations Hub" in `gui_2.py`.
-    - [ ] Sub-task: Move existing windows into their respective Hubs using the `imgui-bundle` docking API.
-    - [ ] Sub-task: Ensure the default layout is saved to and loaded from `manualslop_layout.ini`.
- [x] **Task:** Add Agent Capability Toggles to the UI. [merged]
-    - [x] Sub-task: In the "Projects" or a new "Agent" panel, add checkboxes for each agent tool (e.g., `run_powershell`, `read_file`).
-    - [x] Sub-task: Ensure these UI toggles are saved to the project's `.toml` file.
-    - [x] Sub-task: Ensure `ai_client` respects these settings when determining which tools are available to the AI.
- [x] **Task:** Full Theme Integration. [merged]
-    - [x] Sub-task: Review all newly added windows and controls.
-    - [x] Sub-task: Ensure that colors, fonts, and scaling from `theme_2.py` are correctly applied everywhere.
-    - [x] Sub-task: Test theme switching to confirm all elements update correctly.
- [ ] **Task:** Write tests for UI/UX changes.
-    - [ ] Sub-task: Create `tests/test_gui2_layout.py` to verify the hub structure is created.
-    - [ ] Sub-task: Add tests to verify agent capability toggles are respected.
- [ ] **Task:** Conductor - User Manual Verification 'UI/UX Refinement' (Protocol in workflow.md)
-
-## Phase 4: Finalization and Verification
-
- [ ] **Task:** Conduct full manual testing against `spec.md` Acceptance Criteria.
-    - [ ] Sub-task: Verify AC1: `gui_2.py` launches.
-    - [ ] Sub-task: Verify AC2: Hub layout is correct.
-    - [ ] Sub-task: Verify AC3: Diagnostics panel works.
-    - [ ] Sub-task: Verify AC4: API hooks server runs.
-    - [ ] Sub-task: Verify AC5: MCP tools are usable by AI.
-    - [ ] Sub-task: Verify AC6: Prior Session Viewer works.
-    - [ ] Sub-task: Verify AC7: Theming is consistent.
- [ ] **Task:** Run the full project test suite.
-    - [ ] Sub-task: Execute `uv run run_tests.py` (or equivalent).
-    - [ ] Sub-task: Ensure all existing and new tests pass.
- [ ] **Task:** Code Cleanup and Refactoring.
-    - [ ] Sub-task: Remove any dead code or temporary debug statements.
-    - [ ] Sub-task: Ensure code follows project style guides.
- [ ] **Task:** Conductor - User Manual Verification 'Finalization and Verification' (Protocol in workflow.md)
@@ -1,45 +0,0 @@
-# Specification: GUIv2 Feature Parity
-
-## 1. Overview
-
-This track aims to bring `gui_2.py` (the `imgui-bundle` based UI) to feature parity with the existing `gui.py` (the `dearpygui` based UI). This involves porting several major systems and features to ensure `gui_2.py` can serve as a viable replacement and support the latest project capabilities like automated testing and advanced diagnostics.
-
-## 2. Functional Requirements
-
-### FR1: Port Core Architectural Systems
- **FR1.1: Event-Driven Architecture:** `gui_2.py` MUST be refactored to use the `events.py` module for handling API lifecycle events, decoupling the UI from the AI client.
- **FR1.2: MCP File Tools Integration:** `gui_2.py` MUST integrate and use `mcp_client.py` to provide the AI with native, sandboxed file system capabilities (read, list, search).
-
-### FR2: Port Major Features
- **FR2.1: API Hooks System:** The full API hooks system, including `api_hooks.py` and `api_hook_client.py`, MUST be integrated into `gui_2.py`. This will enable external test automation and state inspection.
- **FR2.2: Performance & Diagnostics:** The performance monitoring capabilities from `performance_monitor.py` MUST be integrated. A new "Diagnostics" panel, mirroring the one in `gui.py`, MUST be created to display real-time metrics (FPS, CPU, Frame Time, etc.).
- **FR2.3: Prior Session Viewer:** The functionality to load and view previous session logs (`.log` files from the `/logs` directory) MUST be implemented, including the distinctive "tinted" UI theme when viewing a prior session.
-
-### FR3: UI/UX Alignment
- **FR3.1: 'Hub' UI Layout:** The windowing layout of `gui_2.py` MUST be refactored to match the "Hub" paradigm of `gui.py`. This includes creating:
-    - `Context Hub`
-    - `AI Settings Hub`
-    - `Discussion Hub`
-    - `Operations Hub`
- **FR3.2: Agent Capability Toggles:** The UI MUST include checkboxes or similar controls to allow the user to enable or disable the AI's agent-level tools (e.g., `run_powershell`, `read_file`).
- **FR3.3: Full Theme Integration:** All new UI components, windows, and controls MUST correctly apply and respond to the application's theming system (`theme_2.py`).
-
-## 3. Non-Functional Requirements
-
- **NFR1: Stability:** The application must remain stable and responsive during and after the feature porting.
- **NFR2: Maintainability:** The new code should follow existing project conventions and be well-structured to ensure maintainability.
-
-## 4. Acceptance Criteria
-
- **AC1:** `gui_2.py` successfully launches without errors.
- **AC2:** The "Hub" layout is present and organizes the UI elements as specified.
- **AC3:** The Diagnostics panel is present and displays updating performance metrics.
- **AC4:** The API hooks server starts and is reachable when `gui_2.py` is run with the appropriate flag.
- **AC5:** The AI can successfully use file system tools provided by `mcp_client.py`.
- **AC6:** The "Prior Session Viewer" can successfully load and display a log file.
- **AC7:** All new UI elements correctly reflect the selected theme.
-
-## 5. Out of Scope
-
- Deprecating or removing `gui.py`. Both will coexist for now.
- Any new features not already present in `gui.py`. This is strictly a porting and alignment task.
@@ -0,0 +1,36 @@
+# Implementation Plan: Human-Like UX Interaction Test
+
+## Phase 1: Infrastructure & Automation Core
+Establish the foundation for driving the GUI via API hooks and simulation logic.
+
+- [ ] Task: Extend `ApiHookClient` with methods for tab switching and listbox selection if missing.
+- [ ] Task: Implement `TestUserAgent` class to manage dynamic response generation and action delays.
+- [ ] Task: Write Tests (Verify basic hook connectivity and simulated delays)
+- [ ] Task: Implement basic 'ping-pong' interaction via hooks.
+- [ ] Task: Conductor - User Manual Verification 'Phase 1: Infrastructure & Automation Core' (Protocol in workflow.md)
+
+## Phase 2: Workflow Simulation
+Build the core interaction loop for project creation and AI discussion.
+
+- [ ] Task: Implement 'New Project' scaffolding script (creating a tiny console program).
+- [ ] Task: Implement 5-turn discussion loop logic with sub-agent responses.
+- [ ] Task: Write Tests (Verify state changes in Discussion Hub during simulated chat)
+- [ ] Task: Implement 'Thinking' and 'Live' indicator verification logic.
+- [ ] Task: Conductor - User Manual Verification 'Phase 2: Workflow Simulation' (Protocol in workflow.md)
+
+## Phase 3: History & Session Verification
+Simulate complex session management and historical audit features.
+
+- [ ] Task: Implement discussion switching logic (creating/switching between named discussions).
+- [ ] Task: Implement 'Load Prior Log' simulation and 'Tinted Mode' detection.
+- [ ] Task: Write Tests (Verify log loading and tab navigation consistency)
+- [ ] Task: Implement truncation limit verification (forcing a long history and checking bleed).
+- [ ] Task: Conductor - User Manual Verification 'Phase 3: History & Session Verification' (Protocol in workflow.md)
+
+## Phase 4: Final Integration & Regression
+Consolidate the simulation into end-user artifacts and CI tests.
+
+- [ ] Task: Create `live_walkthrough.py` with full visual feedback and manual sign-off.
+- [ ] Task: Create `tests/test_live_workflow.py` for automated regression testing.
+- [ ] Task: Perform a full visual walkthrough and verify 'human-readable' pace.
+- [ ] Task: Conductor - User Manual Verification 'Phase 4: Final Integration & Regression' (Protocol in workflow.md)
@@ -136,7 +136,6 @@ For features involving the GUI or complex internal state, unit tests are often i
        # The GUI is now running on port 8999
        ...
    ```
-    Note: pytest must be run with `uv`.

 3.  **Verify via ApiHookClient:** Use the `ApiHookClient` in `api_hook_client.py` to interact with the running application. It includes robust retry logic and health checks.

@@ -1,22 +1,21 @@
 [ai]
 provider = "gemini"
-model = "gemini-2.0-flash"
-temperature = 0.0
-max_tokens = 8192
+model = "gemini-2.5-flash"
+temperature = 0.6000000238418579
+max_tokens = 12000
 history_trunc_limit = 8000
-system_prompt = ""
+system_prompt = "DO NOT EVER make a shell script unless told to. DO NOT EVER make a readme or a file describing your changes unless your are told to. If you have commands I should be entering into the command line or if you have something to explain to me, please just use code blocks or normal text output. DO NOT DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TODO. DO NOT EVER, EVER DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TO DO. IF YOU WANT TO DO OTHER THINGS, SIMPLY SUGGEST THEM, AND THEN I WILL REVIEW YOUR CHANGES, AND MAKE THE DECISION ON HOW TO PROCEED. WHEN WRITING SCRIPTS USE A 120-160 character limit per line. I don't want to see scrunched code.\n"

 [theme]
-palette = "Gold"
-font_path = ""
-font_size = 14.0
+palette = "10x Dark"
+font_path = "C:/Users/Ed/AppData/Local/uv/cache/archive-v0/WSthkYsQ82b_ywV6DkiaJ/pygame_gui/data/FiraCode-Regular.ttf"
+font_size = 18.0
 scale = 1.0

 [projects]
 paths = [
    "manual_slop.toml",
    "C:/projects/forth/bootslop/bootslop.toml",
-    "C:\\projects\\manual_slop\\tests\\temp_project.toml",
 ]
 active = "manual_slop.toml"

@@ -129,7 +129,7 @@ def _add_text_field(parent: str, label: str, value: str):
            if wrap:
                with dpg.child_window(height=80, border=True):
                    # add_input_text for selection
-                    dpg.add_input_text(default_value=value, multiline=True, readonly=True, width=-1, height=-1)
+                    dpg.add_input_text(default_value=value, multiline=True, readonly=True, width=-1, height=-1, border=False)
            else:
                dpg.add_input_text(
                    default_value=value,
@@ -140,14 +140,14 @@ def _add_text_field(parent: str, label: str, value: str):
                )
        else:
            # Short selectable text
-            dpg.add_input_text(default_value=value if value else "(empty)", readonly=True, width=-1)
+            dpg.add_input_text(default_value=value if value else "(empty)", readonly=True, width=-1, border=False)


 def _add_kv_row(parent: str, key: str, val, val_color=None):
    """Single key: value row, horizontally laid out."""
    with dpg.group(horizontal=True, parent=parent):
        dpg.add_text(f"{key}:", color=_LABEL_COLOR)
-        dpg.add_input_text(default_value=str(val), readonly=True, width=-1)
+        dpg.add_input_text(default_value=str(val), readonly=True, width=-1, border=False)


 def _render_usage(parent: str, usage: dict):
@@ -1168,9 +1168,9 @@ class App:
                hint="New discussion name",
                width=-180,
            )
-            dpg.add_button(label="Create", tag="btn_disc_create", callback=self.cb_disc_create)
-            dpg.add_button(label="Rename", tag="btn_disc_rename", callback=self.cb_disc_rename)
-            dpg.add_button(label="Delete", tag="btn_disc_delete", callback=self.cb_disc_delete)
+            dpg.add_button(label="Create", callback=self.cb_disc_create)
+            dpg.add_button(label="Rename", callback=self.cb_disc_rename)
+            dpg.add_button(label="Delete", callback=self.cb_disc_delete)

    def _make_remove_file_cb(self, idx: int):
        def cb():
@@ -1506,28 +1506,6 @@ class App:
        self._rebuild_projects_list()
        self._update_status(f"created project: {name}")

-    def _cb_new_project_automated(self, path):
-        """Automated version of cb_new_project that doesn't show a dialog."""
-        if not path:
-            return
-        name = Path(path).stem
-        proj = project_manager.default_project(name)
-        project_manager.save_project(proj, path)
-        if path not in self.project_paths:
-            self.project_paths.append(path)
-        
-        # Safely queue project switch and list rebuild for the main thread
-        def main_thread_work():
-            self._switch_project(path)
-            self._rebuild_projects_list()
-            self._update_status(f"created project: {name}")
-
-        with self._pending_gui_tasks_lock:
-            self._pending_gui_tasks.append({
-                "action": "custom_callback",
-                "callback": main_thread_work
-            })
-
    def cb_browse_git_dir(self):
        root = hide_tk_root()
        d = filedialog.askdirectory(title="Select Git Directory")
@@ -1904,9 +1882,6 @@ class App:
            no_close=False,
            no_collapse=True,
        ):
-            with dpg.group(tag="automated_actions_group", show=False):
-                dpg.add_button(tag="btn_project_new_automated", callback=lambda s, a, u: self._cb_new_project_automated(u))
-
            with dpg.tab_bar():
                with dpg.tab(label="Projects"):
                    proj_meta = self.project.get("project", {})
@@ -1944,9 +1919,9 @@ class App:
                    with dpg.child_window(tag="projects_scroll", height=120, border=True):
                        pass
                    with dpg.group(horizontal=True):
-                        dpg.add_button(label="Add Project", tag="btn_project_add", callback=self.cb_add_project)
-                        dpg.add_button(label="New Project", tag="btn_project_new", callback=self.cb_new_project)
-                        dpg.add_button(label="Save All", tag="btn_project_save", callback=self.cb_save_config)
+                        dpg.add_button(label="Add Project", callback=self.cb_add_project)
+                        dpg.add_button(label="New Project", callback=self.cb_new_project)
+                        dpg.add_button(label="Save All", callback=self.cb_save_config)
                    dpg.add_checkbox(
                        tag="project_word_wrap",
                        label="Word-Wrap (Read-only panels)",
@@ -2093,7 +2068,7 @@ class App:
                    dpg.add_button(label="+All", callback=self.cb_disc_expand_all)
                    dpg.add_text("Keep Pairs:", color=(160, 160, 160))
                    dpg.add_input_int(tag="disc_truncate_pairs", default_value=2, width=80, min_value=1)
-                    dpg.add_button(label="Truncate", tag="btn_disc_truncate", callback=self.cb_disc_truncate)
+                    dpg.add_button(label="Truncate", callback=self.cb_disc_truncate)
                    dpg.add_button(label="Clear All", callback=self.cb_disc_clear)
                    dpg.add_button(label="Save", callback=self.cb_disc_save)
                
@@ -2125,10 +2100,10 @@ class App:
                        height=200,
                    )
                    with dpg.group(horizontal=True):
-                        dpg.add_button(label="Gen + Send", tag="btn_gen_send", callback=self.cb_generate_send)
-                        dpg.add_button(label="MD Only", tag="btn_md_only", callback=self.cb_md_only)
-                        dpg.add_button(label="Reset", tag="btn_reset", callback=self.cb_reset_session)
-                        dpg.add_button(label="-> History", tag="btn_to_history", callback=self.cb_append_message_to_history)
+                        dpg.add_button(label="Gen + Send", callback=self.cb_generate_send)
+                        dpg.add_button(label="MD Only", callback=self.cb_md_only)
+                        dpg.add_button(label="Reset", callback=self.cb_reset_session)
+                        dpg.add_button(label="-> History", callback=self.cb_append_message_to_history)

                with dpg.tab(label="AI Response"):
                    dpg.add_input_text(
@@ -2158,13 +2133,13 @@ class App:
                dpg.add_spacer(width=20)
                dpg.add_text("LIVE", tag="operations_live_indicator", color=(100, 255, 100), show=False)

-            with dpg.tab_bar(tag="operations_tabs"):
-                with dpg.tab(label="Comms Log", tag="tab_comms"):
+            with dpg.tab_bar():
+                with dpg.tab(label="Comms Log"):
                    with dpg.group(horizontal=True):
                        dpg.add_text("Status: idle", tag="ai_status", color=(200, 220, 160))
                        dpg.add_spacer(width=16)
                        dpg.add_button(label="Clear", callback=self.cb_clear_comms)
-                        dpg.add_button(label="Load Log", tag="btn_load_log", callback=self.cb_load_prior_log)
+                        dpg.add_button(label="Load Log", callback=self.cb_load_prior_log)
                        dpg.add_button(label="Exit Prior", tag="exit_prior_btn", callback=self.cb_exit_prior_session, show=False)
                    
                    dpg.add_text("PRIOR SESSION VIEW", tag="prior_session_indicator", color=(255, 100, 100), show=False)
@@ -2173,7 +2148,7 @@ class App:
                    with dpg.child_window(tag="comms_scroll", height=-1, border=False, horizontal_scrollbar=True):
                        pass

-                with dpg.tab(label="Tool Log", tag="tab_tool"):
+                with dpg.tab(label="Tool Log"):
                    with dpg.group(horizontal=True):
                        dpg.add_text("Tool call history")
                        dpg.add_button(label="Clear", callback=self.cb_clear_tool_log)
@@ -2326,46 +2301,10 @@ class App:
                        dpg.set_value(item, val)
                elif action == "click":
                    item = task.get("item")
-                    args = task.get("args", [])
-                    kwargs = task.get("kwargs", {})
-                    user_data = task.get("user_data")
                    if item and dpg.does_item_exist(item):
                        cb = dpg.get_item_callback(item)
                        if cb:
-                            try:
-                                # DPG callbacks can have (sender, app_data, user_data)
-                                # If we have specific args/kwargs we use them, 
-                                # otherwise we try to follow the DPG pattern.
-                                if args or kwargs:
-                                    cb(*args, **kwargs)
-                                elif user_data is not None:
-                                    cb(item, None, user_data)
-                                else:
-                                    cb()
-                            except Exception as e:
-                                print(f"Error in GUI hook callback for {item}: {e}")
-                elif action == "select_tab":
-                    tab_bar = task.get("tab_bar")
-                    tab = task.get("tab")
-                    if tab_bar and dpg.does_item_exist(tab_bar):
-                        dpg.set_value(tab_bar, tab)
-                elif action == "select_list_item":
-                    listbox = task.get("listbox")
-                    val = task.get("item_value")
-                    if listbox and dpg.does_item_exist(listbox):
-                        dpg.set_value(listbox, val)
-                        cb = dpg.get_item_callback(listbox)
-                        if cb:
-                            # Dear PyGui callbacks for listbox usually receive (sender, app_data, user_data)
-                            # app_data is the selected value.
-                            cb(listbox, val)
-                elif action == "custom_callback":
-                    cb = task.get("callback")
-                    if cb:
-                        try:
                            cb()
-                        except Exception as e:
-                            print(f"Error in custom GUI hook callback: {e}")
                elif action == "refresh_api_metrics":
                    self._refresh_api_metrics(task.get("payload", {}))
            except Exception as e:
@@ -16,7 +16,6 @@ import session_logger
 import project_manager
 import theme_2 as theme
 import tomllib
-import events
 import numpy as np
 import api_hooks
 from performance_monitor import PerformanceMonitor
@@ -153,6 +152,7 @@ class App:
        self.last_file_items: list = []
        
        self.send_thread: threading.Thread | None = None
+        self._send_thread_lock = threading.Lock()
        self.models_thread: threading.Thread | None = None

        _default_windows = {
@@ -232,6 +232,10 @@ class App:
        self.perf_history = {"frame_time": [0.0]*100, "fps": [0.0]*100, "cpu": [0.0]*100, "input_lag": [0.0]*100}
        self._perf_last_update = 0.0

+        # Auto-save timer (every 60s)
+        self._autosave_interval = 60.0
+        self._last_autosave = time.time()
+
        session_logger.open_session()
        ai_client.set_provider(self.current_provider, self.current_model)
        ai_client.confirm_and_run_callback = self._confirm_and_run
@@ -590,41 +594,24 @@ class App:

    # ---------------------------------------------------------------- gui

-    def _show_menus(self):
-        if imgui.begin_menu("Windows"):
-            for w in self.show_windows.keys():
-                _, self.show_windows[w] = imgui.menu_item(w, "", self.show_windows[w])
-            imgui.end_menu()
-        if imgui.begin_menu("Project"):
-            if imgui.menu_item("Save All", "", False)[0]:
-                self._flush_to_project()
-                self._save_active_project()
-                self._flush_to_config()
-                save_config(self.config)
-                self.ai_status = "config saved"
-            if imgui.menu_item("Reset Session", "", False)[0]:
-                ai_client.reset_session()
-                ai_client.clear_comms_log()
-                self._tool_log.clear()
-                self._comms_log.clear()
-                self.ai_status = "session reset"
-                self.ai_response = ""
-            if imgui.menu_item("Generate MD Only", "", False)[0]:
-                try:
-                    md, path, *_ = self._do_generate()
-                    self.last_md = md
-                    self.last_md_path = path
-                    self.ai_status = f"md written: {path.name}"
-                except Exception as e:
-                    self.ai_status = f"error: {e}"
-            imgui.end_menu()
-
    def _gui_func(self):
        self.perf_monitor.start_frame()

        # Process GUI task queue
        self._process_pending_gui_tasks()

+        # Auto-save (every 60s)
+        now = time.time()
+        if now - self._last_autosave >= self._autosave_interval:
+            self._last_autosave = now
+            try:
+                self._flush_to_project()
+                self._save_active_project()
+                self._flush_to_config()
+                save_config(self.config)
+            except Exception:
+                pass  # silent — don't disrupt the GUI loop
+
        # Sync pending comms
        with self._pending_comms_lock:
            for c in self._pending_comms:
@@ -640,35 +627,35 @@ class App:
                self.disc_entries.append(item)
            self._pending_history_adds.clear()

-        # if imgui.begin_main_menu_bar():
-        #     if imgui.begin_menu("Windows"):
-        #         for w in self.show_windows.keys():
-        #             _, self.show_windows[w] = imgui.menu_item(w, "", self.show_windows[w])
-        #         imgui.end_menu()
-        #     if imgui.begin_menu("Project"):
-        #         if imgui.menu_item("Save All", "", False)[0]:
-        #             self._flush_to_project()
-        #             self._save_active_project()
-        #             self._flush_to_config()
-        #             save_config(self.config)
-        #             self.ai_status = "config saved"
-        #         if imgui.menu_item("Reset Session", "", False)[0]:
-        #             ai_client.reset_session()
-        #             ai_client.clear_comms_log()
-        #             self._tool_log.clear()
-        #             self._comms_log.clear()
-        #             self.ai_status = "session reset"
-        #             self.ai_response = ""
-        #         if imgui.menu_item("Generate MD Only", "", False)[0]:
-        #             try:
-        #                 md, path, *_ = self._do_generate()
-        #                 self.last_md = md
-        #                 self.last_md_path = path
-        #                 self.ai_status = f"md written: {path.name}"
-        #             except Exception as e:
-        #                 self.ai_status = f"error: {e}"
-        #         imgui.end_menu()
-        #     imgui.end_main_menu_bar()
+        if imgui.begin_main_menu_bar():
+            if imgui.begin_menu("Windows"):
+                for w in self.show_windows.keys():
+                    _, self.show_windows[w] = imgui.menu_item(w, "", self.show_windows[w])
+                imgui.end_menu()
+            if imgui.begin_menu("Project"):
+                if imgui.menu_item("Save All", "", False)[0]:
+                    self._flush_to_project()
+                    self._save_active_project()
+                    self._flush_to_config()
+                    save_config(self.config)
+                    self.ai_status = "config saved"
+                if imgui.menu_item("Reset Session", "", False)[0]:
+                    ai_client.reset_session()
+                    ai_client.clear_comms_log()
+                    self._tool_log.clear()
+                    self._comms_log.clear()
+                    self.ai_status = "session reset"
+                    self.ai_response = ""
+                if imgui.menu_item("Generate MD Only", "", False)[0]:
+                    try:
+                        md, path, *_ = self._do_generate()
+                        self.last_md = md
+                        self.last_md_path = path
+                        self.ai_status = f"md written: {path.name}"
+                    except Exception as e:
+                        self.ai_status = f"error: {e}"
+                imgui.end_menu()
+            imgui.end_main_menu_bar()

        # ---- Projects
        if self.show_windows["Projects"]:
@@ -1109,9 +1096,21 @@ class App:
                    imgui.separator()

                ch, self.ui_ai_input = imgui.input_text_multiline("##ai_in", self.ui_ai_input, imgui.ImVec2(-1, -40))
+
+                # Keyboard shortcuts
+                io = imgui.get_io()
+                ctrl_enter = io.key_ctrl and imgui.is_key_pressed(imgui.Key.enter)
+                ctrl_l = io.key_ctrl and imgui.is_key_pressed(imgui.Key.l)
+                if ctrl_l:
+                    self.ui_ai_input = ""
+
                imgui.separator()
-                if imgui.button("Gen + Send"):
-                    if not (self.send_thread and self.send_thread.is_alive()):
+                send_busy = False
+                with self._send_thread_lock:
+                    if self.send_thread and self.send_thread.is_alive():
+                        send_busy = True
+                if imgui.button("Gen + Send") or ctrl_enter:
+                    if not send_busy:
                        try:
                            md, path, file_items, stable_md, disc_text = self._do_generate()
                            self.last_md = md
@@ -1127,10 +1126,7 @@ class App:
                            ai_client.set_custom_system_prompt("\n\n".join(csp))
                            ai_client.set_model_params(self.temperature, self.max_tokens, self.history_trunc_limit)
                            ai_client.set_agent_tools(self.ui_agent_tools)
-                            # For Gemini: send stable_md (no history) as cached context,
-                            # and disc_text separately as conversation history.
-                            # For Anthropic: send full md (with history) as before.
-                            send_md = stable_md  # No history in cached context for either provider
+                            send_md = stable_md
                            send_disc = disc_text

                            def do_send():
@@ -1159,9 +1155,10 @@ class App:
                                    if self.ui_auto_add_history:
                                        with self._pending_history_adds_lock:
                                            self._pending_history_adds.append({"role": "System", "content": self.ai_response, "collapsed": False, "ts": project_manager.now_ts()})
-                                            
-                            self.send_thread = threading.Thread(target=do_send, daemon=True)
-                            self.send_thread.start()
+
+                            with self._send_thread_lock:
+                                self.send_thread = threading.Thread(target=do_send, daemon=True)
+                                self.send_thread.start()
                imgui.same_line()
                if imgui.button("MD Only"):
                    try:
@@ -1619,12 +1616,9 @@ class App:
        self.runner_params.app_window_params.window_geometry.size = (1680, 1200)
        self.runner_params.imgui_window_params.enable_viewports = True
        self.runner_params.imgui_window_params.default_imgui_window_type = hello_imgui.DefaultImGuiWindowType.provide_full_screen_dock_space
-        self.runner_params.imgui_window_params.show_menu_bar = True
        self.runner_params.ini_folder_type = hello_imgui.IniFolderType.current_folder
        self.runner_params.ini_filename = "manualslop_layout.ini"
-
        self.runner_params.callbacks.show_gui = self._gui_func
-        self.runner_params.callbacks.show_menus = self._show_menus
        self.runner_params.callbacks.load_additional_fonts = self._load_fonts
        self.runner_params.callbacks.post_init = self._post_init
        
@@ -1651,4 +1645,4 @@ def main():
    app.run()

 if __name__ == "__main__":
-    main()
+    main()
@@ -8,81 +8,70 @@ Size=400,400
 Collapsed=0

 [Window][Projects]
-ViewportPos=43,95
-ViewportId=0x78C57832
-Size=897,649
+Pos=209,396
+Size=387,337
 Collapsed=0
-DockId=0x0000000D,0
+DockId=0x00000014,0

 [Window][Files]
-ViewportPos=3125,170
-ViewportId=0x26D64416
-Size=593,581
+Pos=0,0
+Size=207,1200
 Collapsed=0
-DockId=0x00000009,0
+DockId=0x00000011,0

 [Window][Screenshots]
-ViewportPos=3125,170
-ViewportId=0x26D64416
-Pos=0,583
-Size=593,574
+Pos=209,0
+Size=387,171
 Collapsed=0
-DockId=0x0000000A,0
+DockId=0x00000015,0

 [Window][Discussion History]
-Pos=0,17
-Size=1680,730
-Collapsed=0
-DockId=0x0000000C,0
-
-[Window][Provider]
-ViewportPos=43,95
-ViewportId=0x78C57832
-Pos=0,651
-Size=897,468
+Pos=598,128
+Size=554,619
 Collapsed=0
 DockId=0x0000000E,0

-[Window][Message]
-Pos=0,749
-Size=1680,451
+[Window][Provider]
+Pos=209,913
+Size=387,287
 Collapsed=0
-DockId=0x0000000F,0
+DockId=0x0000000A,0
+
+[Window][Message]
+Pos=598,749
+Size=554,451
+Collapsed=0
+DockId=0x0000000C,0

 [Window][Response]
-Pos=0,749
-Size=1680,451
+Pos=209,735
+Size=387,176
 Collapsed=0
-DockId=0x0000000F,1
+DockId=0x00000010,0

 [Window][Tool Calls]
-ViewportPos=43,95
-ViewportId=0x78C57832
-Pos=0,1121
-Size=897,775
+Pos=1154,733
+Size=526,144
 Collapsed=0
-DockId=0x00000001,1
+DockId=0x00000008,0

 [Window][Comms History]
-ViewportPos=43,95
-ViewportId=0x78C57832
-Pos=0,1121
-Size=897,775
+Pos=1154,879
+Size=526,321
 Collapsed=0
-DockId=0x00000001,0
+DockId=0x00000006,0

 [Window][System Prompts]
-Pos=0,749
-Size=1680,451
+Pos=1154,0
+Size=286,731
 Collapsed=0
-DockId=0x0000000F,2
+DockId=0x00000017,0

 [Window][Theme]
-ViewportPos=43,95
-ViewportId=0x78C57832
-Size=897,649
+Pos=209,173
+Size=387,221
 Collapsed=0
-DockId=0x0000000D,2
+DockId=0x00000016,0

 [Window][Text Viewer - Entry #7]
 Pos=379,324
@@ -90,24 +79,37 @@ Size=900,700
 Collapsed=0

 [Window][Diagnostics]
-ViewportPos=43,95
-ViewportId=0x78C57832
-Size=897,649
+Pos=1442,0
+Size=238,731
 Collapsed=0
-DockId=0x0000000D,1
+DockId=0x00000018,0

 [Docking][Data]
-DockNode      ID=0x00000007 Pos=43,95 Size=897,1896 Split=Y
-  DockNode    ID=0x00000002 Parent=0x00000007 SizeRef=1029,1119 Split=Y
-    DockNode  ID=0x0000000D Parent=0x00000002 SizeRef=1029,649 Selected=0xB4CBF21A
-    DockNode  ID=0x0000000E Parent=0x00000002 SizeRef=1029,468 Selected=0xA07B5F14
-  DockNode    ID=0x00000001 Parent=0x00000007 SizeRef=1029,775 Selected=0x8B4EBFA6
-DockNode      ID=0x00000008 Pos=3125,170 Size=593,1157 Split=Y
-  DockNode    ID=0x00000009 Parent=0x00000008 SizeRef=1029,147 Selected=0x0469CA7A
-  DockNode    ID=0x0000000A Parent=0x00000008 SizeRef=1029,145 Selected=0xDF822E02
-DockSpace     ID=0xAFC85805 Window=0x079D3A04 Pos=1052,572 Size=1680,1183 Split=Y
-  DockNode    ID=0x0000000C Parent=0xAFC85805 SizeRef=1362,1041 CentralNode=1 Selected=0x5D11106F
-  DockNode    ID=0x0000000F Parent=0xAFC85805 SizeRef=1362,451 Selected=0xDD6419BC
+DockSpace               ID=0xAFC85805 Window=0x079D3A04 Pos=346,232 Size=1680,1200 Split=X
+  DockNode              ID=0x00000011 Parent=0xAFC85805 SizeRef=207,1200 Selected=0x0469CA7A
+  DockNode              ID=0x00000012 Parent=0xAFC85805 SizeRef=1559,1200 Split=X
+    DockNode            ID=0x00000003 Parent=0x00000012 SizeRef=943,1200 Split=X
+      DockNode          ID=0x00000001 Parent=0x00000003 SizeRef=387,1200 Split=Y Selected=0x8CA2375C
+        DockNode        ID=0x00000009 Parent=0x00000001 SizeRef=405,911 Split=Y Selected=0x8CA2375C
+          DockNode      ID=0x0000000F Parent=0x00000009 SizeRef=405,733 Split=Y Selected=0x8CA2375C
+            DockNode    ID=0x00000013 Parent=0x0000000F SizeRef=405,394 Split=Y Selected=0x8CA2375C
+              DockNode  ID=0x00000015 Parent=0x00000013 SizeRef=405,171 Selected=0xDF822E02
+              DockNode  ID=0x00000016 Parent=0x00000013 SizeRef=405,221 Selected=0x8CA2375C
+            DockNode    ID=0x00000014 Parent=0x0000000F SizeRef=405,337 Selected=0xDA22FEDA
+          DockNode      ID=0x00000010 Parent=0x00000009 SizeRef=405,176 Selected=0x0D5A5273
+        DockNode        ID=0x0000000A Parent=0x00000001 SizeRef=405,287 Selected=0xA07B5F14
+      DockNode          ID=0x00000002 Parent=0x00000003 SizeRef=554,1200 Split=Y
+        DockNode        ID=0x0000000B Parent=0x00000002 SizeRef=1010,747 Split=Y
+          DockNode      ID=0x0000000D Parent=0x0000000B SizeRef=1010,126 CentralNode=1
+          DockNode      ID=0x0000000E Parent=0x0000000B SizeRef=1010,619 Selected=0x5D11106F
+        DockNode        ID=0x0000000C Parent=0x00000002 SizeRef=1010,451 Selected=0x66CFB56E
+    DockNode            ID=0x00000004 Parent=0x00000012 SizeRef=526,1200 Split=Y Selected=0xDD6419BC
+      DockNode          ID=0x00000005 Parent=0x00000004 SizeRef=261,877 Split=Y Selected=0xDD6419BC
+        DockNode        ID=0x00000007 Parent=0x00000005 SizeRef=261,731 Split=X Selected=0xDD6419BC
+          DockNode      ID=0x00000017 Parent=0x00000007 SizeRef=286,731 Selected=0xDD6419BC
+          DockNode      ID=0x00000018 Parent=0x00000007 SizeRef=238,731 Selected=0xB4CBF21A
+        DockNode        ID=0x00000008 Parent=0x00000005 SizeRef=261,144 Selected=0x1D56B311
+      DockNode          ID=0x00000006 Parent=0x00000004 SizeRef=261,321 Selected=0x8B4EBFA6

 ;;;<<<Layout_655921752_Default>>>;;;
 ;;;<<<HelloImGui_Misc>>>;;;
@@ -117,6 +119,6 @@ Name=Default
 Show=false
 ShowFps=true
 [Theme]
-Name=SoDark_AccentRed
+Name=DarculaDarker
 ;;;<<<SplitIds>>>;;;
 {"gImGuiSplitIDs":{"MainDockSpace":2949142533}}
@@ -65,7 +65,10 @@ def configure(file_items: list[dict], extra_base_dirs: list[str] | None = None):
    for item in file_items:
        p = item.get("path")
        if p is not None:
-            rp = Path(p).resolve()
+            try:
+                rp = Path(p).resolve(strict=True)
+            except (OSError, ValueError):
+                rp = Path(p).resolve()
            _allowed_paths.add(rp)
            _base_dirs.add(rp.parent)

@@ -82,8 +85,13 @@ def _is_allowed(path: Path) -> bool:
    A path is allowed if:
      - it is explicitly in _allowed_paths, OR
      - it is contained within (or equal to) one of the _base_dirs
+    All paths are resolved (follows symlinks) before comparison to prevent
+    symlink-based path traversal.
    """
-    rp = path.resolve()
+    try:
+        rp = path.resolve(strict=True)
+    except (OSError, ValueError):
+        rp = path.resolve()
    if rp in _allowed_paths:
        return True
    for bd in _base_dirs:
@@ -104,7 +112,10 @@ def _resolve_and_check(raw_path: str) -> tuple[Path | None, str]:
        p = Path(raw_path)
        if not p.is_absolute() and _primary_base_dir:
            p = _primary_base_dir / p
-        p = p.resolve()
+        try:
+            p = p.resolve(strict=True)
+        except (OSError, ValueError):
+            p = p.resolve()
    except Exception as e:
        return None, f"ERROR: invalid path '{raw_path}': {e}"
    if not _is_allowed(p):
@@ -269,7 +280,8 @@ def web_search(query: str) -> str:
    url = "https://html.duckduckgo.com/html/?q=" + urllib.parse.quote(query)
    req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'})
    try:
-        html = urllib.request.urlopen(req, timeout=10).read().decode('utf-8', errors='ignore')
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            html = resp.read().decode('utf-8', errors='ignore')
        parser = _DDGParser()
        parser.feed(html)
        if not parser.results:
@@ -292,7 +304,8 @@ def fetch_url(url: str) -> str:

    req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'})
    try:
-        html = urllib.request.urlopen(req, timeout=10).read().decode('utf-8', errors='ignore')
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            html = resp.read().decode('utf-8', errors='ignore')
        parser = _TextExtractor()
        parser.feed(html)
        full_text = " ".join(parser.text)
@@ -35,5 +35,5 @@ active = "main"

 [discussion.discussions.main]
 git_commit = ""
-last_updated = "2026-02-23T19:01:39"
+last_updated = "2026-02-23T16:52:30"
 history = []
@@ -26,6 +26,7 @@ scripts/generated/
 Where <ts> = YYYYMMDD_HHMMSS of when this session was started.
 """

+import atexit
 import datetime
 import json
 import threading
@@ -71,6 +72,8 @@ def open_session():
    _tool_fh.write(f"# Tool-call log — session {_ts}\n\n")
    _tool_fh.flush()

+    atexit.register(close_session)
+

 def close_session():
    """Flush and close both log files.  Called on clean exit (optional)."""
@@ -1,79 +0,0 @@
-import sys
-import os
-import time
-import random
-from api_hook_client import ApiHookClient
-from simulation.workflow_sim import WorkflowSimulator
-
-def main():
-    client = ApiHookClient()
-    print("=== Manual Slop: Live UX Walkthrough ===")
-    print("Connecting to GUI...")
-    if not client.wait_for_server(timeout=10):
-        print("Error: Could not connect to GUI. Ensure it is running with --enable-test-hooks")
-        return
-
-    sim = WorkflowSimulator(client)
-
-    # 1. Start Clean
-    print("\n[Action] Resetting Session...")
-    client.click("btn_reset")
-    time.sleep(2)
-
-    # 2. Project Scaffolding
-    project_name = f"LiveTest_{int(time.time())}"
-    # Use actual project dir for realism
-    git_dir = os.path.abspath(".")
-    project_path = os.path.join(git_dir, "tests", f"{project_name}.toml")
-    
-    print(f"\n[Action] Scaffolding Project: {project_name} at {project_path}")
-    sim.setup_new_project(project_name, git_dir, project_path)
-    
-    # Enable auto-add so results appear in history automatically
-    client.set_value("auto_add_history", True)
-    time.sleep(1)
-
-    # 3. Discussion Loop (3 turns for speed, but logic supports more)
-    turns = [
-        "Hi! I want to create a simple python script called 'hello.py' that prints the current date and time. Can you write it for me?",
-        "That looks great. Can you also add a feature to print the name of the operating system?",
-        "Excellent. Now, please create a requirements.txt file with 'requests' in it."
-    ]
-
-    for i, msg in enumerate(turns):
-        print(f"\n--- Turn {i+1} ---")
-        
-        # Switch to Comms Log to see the send
-        client.select_tab("operations_tabs", "tab_comms")
-        
-        sim.run_discussion_turn(msg)
-        
-        # Check thinking indicator
-        state = client.get_indicator_state("thinking_indicator")
-        if state.get('shown'):
-            print("[Status] Thinking indicator is visible.")
-        
-        # Switch to Tool Log halfway through wait
-        time.sleep(2)
-        client.select_tab("operations_tabs", "tab_tool")
-        
-        # Wait for AI response if not already finished
-        # (run_discussion_turn already waits, so we just observe)
-
-    # 4. History Management
-    print("\n[Action] Creating new discussion thread...")
-    sim.create_discussion("Refinement")
-    
-    print("\n[Action] Switching back to Default...")
-    sim.switch_discussion("Default")
-    
-    # 5. Manual Sign-off Simulation
-    print("\n=== Walkthrough Complete ===")
-    print("Please verify the following in the GUI:")
-    print("1. The project metadata reflects the new project.")
-    print("2. The discussion history contains the 3 turns.")
-    print("3. The 'Refinement' discussion exists in the list.")
-    print("\nWalkthrough finished successfully.")
-
-if __name__ == "__main__":
-    main()
@@ -1,57 +0,0 @@
-import sys
-import os
-import time
-
-# Ensure project root is in path
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from api_hook_client import ApiHookClient
-from simulation.user_agent import UserSimAgent
-
-def main():
-    client = ApiHookClient()
-    print("Waiting for hook server...")
-    if not client.wait_for_server(timeout=5):
-        print("Hook server not found. Start GUI with --enable-test-hooks")
-        return
-
-    sim_agent = UserSimAgent(client)
-    
-    # 1. Reset session to start clean
-    print("Resetting session...")
-    client.click("btn_reset")
-    time.sleep(2) # Give it time to clear
-    
-    # 2. Initial message
-    initial_msg = "Hello! I want to create a simple python script that prints 'Hello World'. Can you help me?"
-    print(f"
-[USER]: {initial_msg}")
-    client.set_value("ai_input", initial_msg)
-    client.click("btn_gen_send")
-    
-    # 3. Wait for AI response
-    print("Waiting for AI response...", end="", flush=True)
-    last_entry_count = 0
-    for _ in range(60): # 60 seconds max
-        time.sleep(1)
-        print(".", end="", flush=True)
-        session = client.get_session()
-        entries = session.get('session', {}).get('entries', [])
-        
-        if len(entries) > last_entry_count:
-            # Something happened
-            last_entry = entries[-1]
-            if last_entry.get('role') == 'AI' and last_entry.get('content'):
-                print(f"
-
-[AI]: {last_entry.get('content')[:100]}...")
-                print("
-Ping-pong successful!")
-                return
-            last_entry_count = len(entries)
-            
-    print("
-Timeout waiting for AI response")
-
-if __name__ == "__main__":
-    main()
@@ -1,50 +0,0 @@
-import time
-import random
-import ai_client
-
-class UserSimAgent:
-    def __init__(self, hook_client, model="gemini-2.0-flash"):
-        self.hook_client = hook_client
-        self.model = model
-        self.system_prompt = (
-            "You are a software engineer testing an AI coding assistant called 'Manual Slop'. "
-            "You want to build a small Python project and verify the assistant's capabilities. "
-            "Keep your responses concise and human-like. "
-            "Do not use markdown blocks for your main message unless you are providing code."
-        )
-
-    def generate_response(self, conversation_history):
-        """
-        Generates a human-like response based on the conversation history.
-        conversation_history: list of dicts with 'role' and 'content'
-        """
-        # Format history for ai_client
-        # ai_client expects md_content and user_message.
-        # It handles its own internal history.
-        # We want the 'User AI' to have context of what the 'Assistant AI' said.
-        
-        # For now, let's just use the last message from Assistant as the prompt.
-        last_ai_msg = ""
-        for entry in reversed(conversation_history):
-            if entry.get('role') == 'AI':
-                last_ai_msg = entry.get('content', '')
-                break
-        
-        # We need to set a custom system prompt for the User Simulator
-        try:
-            ai_client.set_custom_system_prompt(self.system_prompt)
-            # We'll use a blank md_content for now as the 'User' doesn't need to read its own files 
-            # via the same mechanism, but we could provide it if needed.
-            response = ai_client.send(md_content="", user_message=last_ai_msg)
-        finally:
-            ai_client.set_custom_system_prompt("")
-            
-        return response
-
-    def perform_action_with_delay(self, action_func, *args, **kwargs):
-        """
-        Executes an action with a human-like delay.
-        """
-        delay = random.uniform(0.5, 2.0)
-        time.sleep(delay)
-        return action_func(*args, **kwargs)
@@ -1,76 +0,0 @@
-import time
-import os
-from api_hook_client import ApiHookClient
-from simulation.user_agent import UserSimAgent
-
-class WorkflowSimulator:
-    def __init__(self, hook_client: ApiHookClient):
-        self.client = hook_client
-        self.user_agent = UserSimAgent(hook_client)
-
-    def setup_new_project(self, name, git_dir, project_path=None):
-        print(f"Setting up new project: {name}")
-        if project_path:
-            self.client.click("btn_project_new_automated", user_data=project_path)
-        else:
-            self.client.click("btn_project_new")
-        time.sleep(1)
-        self.client.set_value("project_git_dir", git_dir)
-        self.client.click("btn_project_save")
-        time.sleep(1)
-
-    def create_discussion(self, name):
-        print(f"Creating discussion: {name}")
-        self.client.set_value("disc_new_name_input", name)
-        self.client.click("btn_disc_create")
-        time.sleep(1)
-
-    def switch_discussion(self, name):
-        print(f"Switching to discussion: {name}")
-        self.client.select_list_item("disc_listbox", name)
-        time.sleep(1)
-
-    def load_prior_log(self):
-        print("Loading prior log")
-        self.client.click("btn_load_log")
-        # This usually opens a file dialog which we can't easily automate from here
-        # without more hooks, but we can verify the button click.
-        time.sleep(1)
-
-    def truncate_history(self, pairs):
-        print(f"Truncating history to {pairs} pairs")
-        self.client.set_value("disc_truncate_pairs", pairs)
-        self.client.click("btn_disc_truncate")
-        time.sleep(1)
-
-    def run_discussion_turn(self, user_message=None):
-        if user_message is None:
-            # Generate from AI history
-            session = self.client.get_session()
-            entries = session.get('session', {}).get('entries', [])
-            user_message = self.user_agent.generate_response(entries)
-
-        print(f"\n[USER]: {user_message}")
-        self.client.set_value("ai_input", user_message)
-        self.client.click("btn_gen_send")
-        
-        # Wait for AI
-        return self.wait_for_ai_response()
-
-    def wait_for_ai_response(self, timeout=60):
-        print("Waiting for AI response...", end="", flush=True)
-        start_time = time.time()
-        last_count = len(self.client.get_session().get('session', {}).get('entries', []))
-        
-        while time.time() - start_time < timeout:
-            time.sleep(1)
-            print(".", end="", flush=True)
-            entries = self.client.get_session().get('session', {}).get('entries', [])
-            if len(entries) > last_count:
-                last_entry = entries[-1]
-                if last_entry.get('role') == 'AI' and last_entry.get('content'):
-                    print(f"\n[AI]: {last_entry.get('content')[:100]}...")
-                    return last_entry
-            
-        print("\nTimeout waiting for AI")
-        return None
@@ -32,15 +32,11 @@ def live_gui():
    """
    print("\n[Fixture] Starting gui.py --enable-test-hooks...")
    
-    # Ensure logs directory exists
-    os.makedirs("logs", exist_ok=True)
-    log_file = open("logs/gui_test.log", "w", encoding="utf-8")
-
    # Start gui.py as a subprocess.
    process = subprocess.Popen(
        ["uv", "run", "python", "gui.py", "--enable-test-hooks"],
-        stdout=log_file,
-        stderr=log_file,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
        text=True,
        creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
    )
@@ -1,44 +0,0 @@
-[project]
-name = "temp_project"
-git_dir = "C:\\projects\\manual_slop"
-system_prompt = ""
-main_context = ""
-word_wrap = true
-summary_only = false
-
-[output]
-output_dir = "./md_gen"
-
-[files]
-base_dir = "."
-paths = []
-
-[screenshots]
-base_dir = "."
-paths = []
-
-[agent.tools]
-run_powershell = true
-read_file = true
-list_directory = true
-search_files = true
-get_file_summary = true
-web_search = true
-fetch_url = true
-
-[discussion]
-roles = [
-    "User",
-    "AI",
-    "Vendor API",
-    "System",
-]
-active = "main"
-auto_add = true
-
-[discussion.discussions.main]
-git_commit = ""
-last_updated = "2026-02-23T20:56:57"
-history = [
-    "@2026-02-23T20:12:12\nSystem:\n[PERFORMANCE ALERT] CPU usage high: 121.9%. Please consider optimizing recent changes or reducing load.",
-]
@@ -1,75 +0,0 @@
-import pytest
-import sys
-import os
-
-# Ensure project root is in path for imports
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from api_hook_client import ApiHookClient
-
-def test_api_client_has_extensions():
-    client = ApiHookClient()
-    # These should fail initially as they are not implemented
-    assert hasattr(client, 'select_tab')
-    assert hasattr(client, 'select_list_item')
-
-def test_select_tab_integration(live_gui):
-    client = ApiHookClient()
-    # We'll need to make sure the tags exist in gui.py
-    # For now, this is a placeholder for the integration test
-    response = client.select_tab("operations_tabs", "tab_tool")
-    assert response == {'status': 'queued'}
-
-def test_select_list_item_integration(live_gui):
-    client = ApiHookClient()
-    # Assuming 'Default' discussion exists or we can just test that it queues
-    response = client.select_list_item("disc_listbox", "Default")
-    assert response == {'status': 'queued'}
-
-def test_get_indicator_state_integration(live_gui):
-    client = ApiHookClient()
-    # thinking_indicator is usually hidden unless AI is running
-    response = client.get_indicator_state("thinking_indicator")
-    assert 'shown' in response
-    assert response['tag'] == "thinking_indicator"
-
-def test_app_processes_new_actions():
-    import gui
-    from unittest.mock import MagicMock, patch
-    import dearpygui.dearpygui as dpg
-
-    dpg.create_context()
-    try:
-        with patch('gui.load_config', return_value={}), \
-             patch('gui.PerformanceMonitor'), \
-             patch('gui.shell_runner'), \
-             patch('gui.project_manager'), \
-             patch.object(gui.App, '_load_active_project'):
-            app = gui.App()
-            
-            with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
-                 patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
-                 patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
-                
-                # Test select_tab
-                app._pending_gui_tasks.append({
-                    "action": "select_tab",
-                    "tab_bar": "some_tab_bar",
-                    "tab": "some_tab"
-                })
-                app._process_pending_gui_tasks()
-                mock_set_value.assert_any_call("some_tab_bar", "some_tab")
-                
-                # Test select_list_item
-                mock_cb = MagicMock()
-                mock_get_cb.return_value = mock_cb
-                app._pending_gui_tasks.append({
-                    "action": "select_list_item",
-                    "listbox": "some_listbox",
-                    "item_value": "some_value"
-                })
-                app._process_pending_gui_tasks()
-                mock_set_value.assert_any_call("some_listbox", "some_value")
-                mock_cb.assert_called_with("some_listbox", "some_value")
-    finally:
-        dpg.destroy_context()
@@ -1,48 +0,0 @@
-import pytest
-from unittest.mock import MagicMock, patch
-from gui_2 import App
-import ai_client
-from events import EventEmitter
-
-@pytest.fixture
-def app_instance():
-    """
-    Fixture to create an instance of the gui_2.App class for testing.
-    It mocks functions that would render a window or block execution.
-    """
-    if not hasattr(ai_client, 'events') or ai_client.events is None:
-        ai_client.events = EventEmitter()
-
-    with (
-        patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
-        patch('gui_2.save_config'),
-        patch('gui_2.project_manager'),
-        patch('gui_2.session_logger'),
-        patch('gui_2.immapp.run'),
-        patch.object(App, '_load_active_project'),
-        patch.object(App, '_fetch_models'),
-        patch.object(App, '_load_fonts'),
-        patch.object(App, '_post_init')
-    ):
-        yield App
-
-def test_app_subscribes_to_events(app_instance):
-    """
-    This test checks that the App's __init__ method subscribes the necessary
-    event handlers to the ai_client.events emitter.
-    This test will fail until the event subscription logic is added to gui_2.App.
-    """
-    with patch.object(ai_client.events, 'on') as mock_on:
-        app = app_instance()
-        mock_on.assert_called()
-        
-        calls = mock_on.call_args_list
-        event_names = [call.args[0] for call in calls]
-        assert "request_start" in event_names
-        assert "response_received" in event_names
-        assert "tool_execution" in event_names
-
-        for call in calls:
-            handler = call.args[1]
-            assert hasattr(handler, '__self__')
-            assert handler.__self__ is app
@@ -1,78 +0,0 @@
-import pytest
-from unittest.mock import patch, MagicMock
-from gui_2 import App
-import ai_client
-from events import EventEmitter
-
-@pytest.fixture
-def app_instance():
-    if not hasattr(ai_client, 'events') or ai_client.events is None:
-        ai_client.events = EventEmitter()
-
-    with (
-        patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
-        patch('gui_2.save_config'),
-        patch('gui_2.project_manager'),
-        patch('gui_2.session_logger'),
-        patch('gui_2.immapp.run'),
-        patch.object(App, '_load_active_project'),
-        patch.object(App, '_fetch_models'),
-        patch.object(App, '_load_fonts'),
-        patch.object(App, '_post_init')
-    ):
-        yield App()
-
-def test_mcp_tool_call_is_dispatched(app_instance):
-    """
-    This test verifies that when the AI returns a tool call for an MCP function,
-    the ai_client correctly dispatches it to mcp_client.
-    This will fail until mcp_client is properly integrated.
-    """
-    # 1. Define the mock tool call from the AI
-    mock_fc = MagicMock()
-    mock_fc.name = "read_file"
-    mock_fc.args = {"file_path": "test.txt"}
-
-    # 2. Construct the mock AI response (Gemini format)
-    mock_response_with_tool = MagicMock()
-    mock_part = MagicMock()
-    mock_part.function_call = mock_fc
-    mock_candidate = MagicMock()
-    mock_candidate.content.parts = [mock_part]
-    mock_candidate.finish_reason.name = "TOOL_CALLING"
-    mock_response_with_tool.candidates = [mock_candidate]
-    
-    mock_usage_metadata = MagicMock()
-    mock_usage_metadata.prompt_token_count = 100
-    mock_usage_metadata.candidates_token_count = 10
-    mock_usage_metadata.cached_content_token_count = 0
-    mock_response_with_tool.usage_metadata = mock_usage_metadata
-
-    # 3. Create a mock for the final AI response after the tool call
-    mock_response_final = MagicMock()
-    mock_response_final.text = "Final answer"
-    mock_response_final.candidates = []
-    mock_response_final.usage_metadata = mock_usage_metadata
-
-    # 4. Patch the necessary components
-    with patch("ai_client._ensure_gemini_client"), \
-         patch("ai_client._gemini_client"), \
-         patch("ai_client._gemini_chat") as mock_chat, \
-         patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
-        
-        mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
-        ai_client._gemini_chat = mock_chat
-        
-        ai_client.set_provider("gemini", "mock-model")
-        
-        # 5. Call the send function
-        ai_client.send(
-            md_content="some context",
-            user_message="read the file",
-            base_dir=".",
-            file_items=[],
-            discussion_history=""
-        )
-
-        # 6. Assert that the MCP dispatch function was called
-        mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})
@@ -1,88 +0,0 @@
-import pytest
-import time
-import sys
-import os
-
-# Ensure project root is in path
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from api_hook_client import ApiHookClient
-
-@pytest.mark.integration
-def test_full_live_workflow(live_gui):
-    """
-    Integration test that drives the GUI through a full workflow.
-    """
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    time.sleep(2)
-
-    # 1. Reset
-    client.click("btn_reset")
-    time.sleep(1)
-
-    # 2. Project Setup
-    temp_project_path = os.path.abspath("tests/temp_project.toml")
-    if os.path.exists(temp_project_path):
-        os.remove(temp_project_path)
-        
-    client.click("btn_project_new_automated", user_data=temp_project_path)
-    time.sleep(1) # Wait for project creation and switch
-    
-    # Verify metadata update
-    proj = client.get_project()
-    
-    test_git = os.path.abspath(".")
-    client.set_value("project_git_dir", test_git)
-    client.click("btn_project_save")
-    time.sleep(1)
-    
-    proj = client.get_project()
-    # flat_config returns {"project": {...}, "output": ...} 
-    # so proj is {"project": {"project": {"git_dir": ...}}}
-    assert proj['project']['project']['git_dir'] == test_git
-
-    # Enable auto-add so the response ends up in history
-    client.set_value("auto_add_history", True)
-    time.sleep(0.5)
-
-    # 3. Discussion Turn
-    client.set_value("ai_input", "Hello! This is an automated test. Just say 'Acknowledged'.")
-    client.click("btn_gen_send")
-    
-    # Verify thinking indicator appears (might be brief)
-    thinking_seen = False
-    print("\nPolling for thinking indicator...")
-    for i in range(20):
-        state = client.get_indicator_state("thinking_indicator")
-        if state.get('shown'):
-            thinking_seen = True
-            print(f"Thinking indicator seen at poll {i}")
-            break
-        time.sleep(0.5)
-    
-    # 4. Wait for response in session
-    success = False
-    print("Waiting for AI response in session...")
-    for i in range(60):
-        session = client.get_session()
-        entries = session.get('session', {}).get('entries', [])
-        if any(e.get('role') == 'AI' for e in entries):
-            success = True
-            print(f"AI response found at second {i}")
-            break
-        time.sleep(1)
-
-    assert success, "AI failed to respond within 60 seconds"
-
-    # 5. Switch Discussion
-    client.set_value("disc_new_name_input", "AutoDisc")
-    client.click("btn_disc_create")
-    time.sleep(0.5)
-    
-    client.select_list_item("disc_listbox", "AutoDisc")
-    time.sleep(0.5)
-    
-    # Verify session is empty in new discussion
-    session = client.get_session()
-    assert len(session.get('session', {}).get('entries', [])) == 0
@@ -1,22 +0,0 @@
-import pytest
-import sys
-import os
-
-# Ensure project root is in path for imports
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from simulation.user_agent import UserSimAgent
-
-def test_user_agent_instantiation():
-    agent = UserSimAgent(hook_client=None)
-    assert agent is not None
-
-def test_perform_action_with_delay():
-    agent = UserSimAgent(hook_client=None)
-    called = False
-    def action():
-        nonlocal called
-        called = True
-    
-    agent.perform_action_with_delay(action)
-    assert called is True
@@ -1,47 +0,0 @@
-import pytest
-import sys
-import os
-from unittest.mock import MagicMock, patch
-
-# Ensure project root is in path
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from simulation.workflow_sim import WorkflowSimulator
-
-def test_simulator_instantiation():
-    client = MagicMock()
-    sim = WorkflowSimulator(client)
-    assert sim is not None
-
-def test_setup_new_project():
-    client = MagicMock()
-    sim = WorkflowSimulator(client)
-    
-    # Mock responses for wait_for_server
-    client.wait_for_server.return_value = True
-    
-    sim.setup_new_project("TestProject", "/tmp/test_git")
-    
-    # Verify hook calls
-    client.click.assert_any_call("btn_project_new")
-    client.set_value.assert_any_call("project_git_dir", "/tmp/test_git")
-    client.click.assert_any_call("btn_project_save")
-
-def test_discussion_switching():
-    client = MagicMock()
-    sim = WorkflowSimulator(client)
-    
-    sim.create_discussion("NewDisc")
-    client.set_value.assert_called_with("disc_new_name_input", "NewDisc")
-    client.click.assert_called_with("btn_disc_create")
-    
-    sim.switch_discussion("NewDisc")
-    client.select_list_item.assert_called_with("disc_listbox", "NewDisc")
-
-def test_history_truncation():
-    client = MagicMock()
-    sim = WorkflowSimulator(client)
-    
-    sim.truncate_history(3)
-    client.set_value.assert_called_with("disc_truncate_pairs", 3)
-    client.click.assert_called_with("btn_disc_truncate")