feat(gui): implement Phases 2-5 of Comprehensive GUI UX track

- Add cost tracking with new cost_tracker.py module - Enhance Track Proposal modal with editable titles and goals - Add Conductor Setup summary and New Track creation form to MMA Dashboard - Implement Task DAG editing (add/delete tickets) and track-scoped discussion - Add visual polish: color-coded statuses, tinted progress bars, and node indicators - Support live worker streaming from AI providers to GUI panels - Fix numerous integration test regressions and stabilize headless service
2026-03-01 20:17:31 -05:00
parent 2ce7a87069
commit d1ce0eaaeb
27 changed files with 1763 additions and 254 deletions
--- a/ai_client.py
+++ b/ai_client.py
@@ -506,7 +506,7 @@ def _truncate_tool_output(output: str) -> str:
 def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
 """
    Re-read file_items from disk, but only files whose mtime has changed.
-    Returns (all_items, changed_items) — all_items is the full refreshed list,
+    Returns (all_items, changed_items) â€” all_items is the full refreshed list,
    changed_items contains only the files that were actually modified since
    the last read (used to build a minimal [FILES UPDATED] block).
    """
@@ -523,7 +523,7 @@ def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
   current_mtime = p.stat().st_mtime
   prev_mtime = item.get("mtime", 0.0)
   if current_mtime == prev_mtime:
-    refreshed.append(item)  # unchanged — skip re-read
+    refreshed.append(item)  # unchanged â€” skip re-read
    continue
   content = p.read_text(encoding="utf-8")
   new_item = {**item, "old_content": item.get("content", ""), "content": content, "error": False, "mtime": current_mtime}
@@ -622,7 +622,8 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
 discussion_history: str = "",
 pre_tool_callback: Optional[Callable[[str], bool]] = None,
 qa_callback: Optional[Callable[[str], str]] = None,
- enable_tools: bool = True) -> str:
+ enable_tools: bool = True,
+ stream_callback: Optional[Callable[[str], None]] = None) -> str:
 global _gemini_chat, _gemini_cache, _gemini_cache_md_hash, _gemini_cache_created_at
 try:
  _ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
@@ -729,14 +730,27 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
         r["output"] = val
  for r_idx in range(MAX_TOOL_ROUNDS + 2):
   events.emit("request_start", payload={"provider": "gemini", "model": _model, "round": r_idx})
-   resp = _gemini_chat.send_message(payload)
-   txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
+   if stream_callback:
+    resp = _gemini_chat.send_message_stream(payload)
+    txt_chunks = []
+    for chunk in resp:
+     c_txt = chunk.text
+     if c_txt:
+      txt_chunks.append(c_txt)
+      stream_callback(c_txt)
+    txt = "".join(txt_chunks)
+    calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
+    usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
+    cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
+    if cached_tokens: usage["cache_read_input_tokens"] = cached_tokens
+   else:
+    resp = _gemini_chat.send_message(payload)
+    txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
+    calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
+    usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
+    cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
+    if cached_tokens: usage["cache_read_input_tokens"] = cached_tokens
   if txt: all_text.append(txt)
-   calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
-   usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
-   cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
-   if cached_tokens:
-    usage["cache_read_input_tokens"] = cached_tokens
   events.emit("response_received", payload={"provider": "gemini", "model": _model, "usage": usage, "round": r_idx})
   reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
   _append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
@@ -811,7 +825,8 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
 file_items: list[dict[str, Any]] | None = None,
 discussion_history: str = "",
 pre_tool_callback: Optional[Callable[[str], bool]] = None,
- qa_callback: Optional[Callable[[str], str]] = None) -> str:
+ qa_callback: Optional[Callable[[str], str]] = None,
+ stream_callback: Optional[Callable[[str], None]] = None) -> str:
 global _gemini_cli_adapter
 try:
  if _gemini_cli_adapter is None:
@@ -833,7 +848,13 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
    break
   events.emit("request_start", payload={"provider": "gemini_cli", "model": _model, "round": r_idx})
   _append_comms("OUT", "request", {"message": f"[CLI] [round {r_idx}] [msg {len(payload)}]"})
-   resp_data = adapter.send(payload, safety_settings=safety_settings, system_instruction=sys_instr, model=_model)
+   
+   # If payload is tool results (list), serialize to JSON string for the CLI
+   send_payload = payload
+   if isinstance(payload, list):
+    send_payload = json.dumps(payload)
+    
+   resp_data = adapter.send(send_payload, safety_settings=safety_settings, system_instruction=sys_instr, model=_model)
   # Log any stderr from the CLI for transparency
   cli_stderr = resp_data.get("stderr", "")
   if cli_stderr:
@@ -989,7 +1010,7 @@ def _estimate_prompt_tokens(system_blocks: list[dict], history: list[dict]) -> i
 for block in system_blocks:
  text = block.get("text", "")
  total += max(1, int(len(text) / _CHARS_PER_TOKEN))
-  # Tool definitions (rough fixed estimate — they're ~2k tokens for our set)
+  # Tool definitions (rough fixed estimate â€” they're ~2k tokens for our set)
 total += 2500
 # History messages (uses cached estimates for unchanged messages)
 for msg in history:
@@ -1004,7 +1025,7 @@ def _strip_stale_file_refreshes(history: list[dict[str, Any]]) -> None:
    """
 if len(history) < 2:
  return
-  # Find the index of the last user message — we keep its file refresh intact
+  # Find the index of the last user message â€” we keep its file refresh intact
 last_user_idx = -1
 for i in range(len(history) - 1, -1, -1):
  if history[i].get("role") == "user":
@@ -1120,7 +1141,7 @@ def _add_history_cache_breakpoint(history: list[dict[str, Any]]) -> None:
    """
 user_indices = [i for i, m in enumerate(history) if m.get("role") == "user"]
 if len(user_indices) < 2:
-  return  # Only one user message (the current turn) — nothing stable to cache
+  return  # Only one user message (the current turn) â€” nothing stable to cache
 target_idx = user_indices[-2]
 content = history[target_idx].get("content")
 if isinstance(content, list) and content:
@@ -1163,12 +1184,12 @@ def _repair_anthropic_history(history: list[dict[str, Any]]) -> None:
   ],
  })

-def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict[str, Any]] | None = None, discussion_history: str = "", pre_tool_callback: Optional[Callable[[str], bool]] = None, qa_callback: Optional[Callable[[str], str]] = None) -> str:
+def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict[str, Any]] | None = None, discussion_history: str = "", pre_tool_callback: Optional[Callable[[str], bool]] = None, qa_callback: Optional[Callable[[str], str]] = None, stream_callback: Optional[Callable[[str], None]] = None) -> str:
 try:
  _ensure_anthropic_client()
  mcp_client.configure(file_items or [], [base_dir])
  # Split system into two cache breakpoints:
-  # 1. Stable system prompt (never changes — always a cache hit)
+  # 1. Stable system prompt (never changes â€” always a cache hit)
  # 2. Dynamic file context (invalidated only when files change)
  stable_prompt = _get_combined_system_prompt()
  stable_blocks = [{"type": "text", "text": stable_prompt, "cache_control": {"type": "ephemeral"}}]
@@ -1223,14 +1244,28 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
    def _strip_private_keys(history: list[dict[str, Any]]) -> list[dict[str, Any]]:
     return [{k: v for k, v in m.items() if not k.startswith("_")} for m in history]
   events.emit("request_start", payload={"provider": "anthropic", "model": _model, "round": round_idx})
-   response = _anthropic_client.messages.create(
-    model=_model,
-    max_tokens=_max_tokens,
-    temperature=_temperature,
-    system=system_blocks,
-    tools=_get_anthropic_tools(),
-    messages=_strip_private_keys(_anthropic_history),
-   )
+   if stream_callback:
+    with _anthropic_client.messages.stream(
+     model=_model,
+     max_tokens=_max_tokens,
+     temperature=_temperature,
+     system=system_blocks,
+     tools=_get_anthropic_tools(),
+     messages=_strip_private_keys(_anthropic_history),
+    ) as stream:
+     for event in stream:
+      if event.type == "content_block_delta" and event.delta.type == "text_delta":
+       stream_callback(event.delta.text)
+     response = stream.get_final_message()
+   else:
+    response = _anthropic_client.messages.create(
+     model=_model,
+     max_tokens=_max_tokens,
+     temperature=_temperature,
+     system=system_blocks,
+     tools=_get_anthropic_tools(),
+     messages=_strip_private_keys(_anthropic_history),
+    )
   # Convert SDK content block objects to plain dicts before storing in history
   serialised_content = [_content_block_to_dict(b) for b in response.content]
   _anthropic_history.append({
@@ -1327,7 +1362,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
      "text": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
     })
    _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
-    # Refresh file context after tool calls — only inject CHANGED files
+    # Refresh file context after tool calls â€” only inject CHANGED files
   if file_items:
    file_items, changed = _reread_file_items(file_items)
    refreshed_ctx = _build_file_diff_text(changed)
@@ -1335,7 +1370,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
     tool_results.append({
       "type": "text",
       "text": (
-        "[FILES UPDATED — current contents below. "
+        "[FILES UPDATED â€” current contents below. "
        "Do NOT re-read these files with PowerShell.]\n\n"
        + refreshed_ctx
       ),
@@ -1377,7 +1412,8 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
 discussion_history: str = "",
 stream: bool = False,
 pre_tool_callback: Optional[Callable[[str], bool]] = None,
- qa_callback: Optional[Callable[[str], str]] = None) -> str:
+ qa_callback: Optional[Callable[[str], str]] = None,
+ stream_callback: Optional[Callable[[str], None]] = None) -> str:
 """
    Sends a message to the DeepSeek API, handling tool calls and history.
    Supports streaming responses.
@@ -1444,7 +1480,10 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
       chunk = json.loads(chunk_str)
       delta = chunk.get("choices", [{}])[0].get("delta", {})
       if delta.get("content"):
-        aggregated_content += delta["content"]
+        content_chunk = delta["content"]
+        aggregated_content += content_chunk
+        if stream_callback:
+         stream_callback(content_chunk)
       if delta.get("reasoning_content"):
        aggregated_reasoning += delta["reasoning_content"]
       if delta.get("tool_calls"):
@@ -1615,10 +1654,7 @@ def run_tier4_analysis(stderr: str) -> str:
  # ------------------------------------------------------------------ unified send

 import json
-from typing import Any, Callable, Optional, List

-# Assuming _model, _system_prompt, _provider, _send_lock are module-level variables
-# and the _send_xxx functions are also defined at module level.

 def send(
 md_content: str,
@@ -1630,32 +1666,36 @@ def send(
 pre_tool_callback: Optional[Callable[[str], bool]] = None,
 qa_callback: Optional[Callable[[str], str]] = None,
 enable_tools: bool = True,
+ stream_callback: Optional[Callable[[str], None]] = None,
 ) -> str:
 """
-    Send a message to the active provider.
-
-    md_content         : aggregated markdown string (for Gemini: stable content only,
-                         for Anthropic: full content including history)
-    user_message       : the user question / instruction
-    base_dir           : project base directory (for PowerShell tool calls)
-    file_items         : list of file dicts from aggregate.build_file_items() for
-                         dynamic context refresh after tool calls
-    discussion_history : discussion history text (used by Gemini to inject as
-                         conversation message instead of caching it)
-    stream             : Whether to use streaming (supported by DeepSeek)
-    pre_tool_callback  : Optional callback (payload: str) -> bool called before tool execution
-    qa_callback        : Optional callback (stderr: str) -> str called for Tier 4 error analysis
+    Sends a prompt with the full markdown context to the current AI provider.
+    Returns the final text response.
    """
 with _send_lock:
  if _provider == "gemini":
-   return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback, enable_tools=enable_tools)
+   return _send_gemini(
+    md_content, user_message, base_dir, file_items, discussion_history,
+    pre_tool_callback, qa_callback, enable_tools, stream_callback
+   )
  elif _provider == "gemini_cli":
-   return _send_gemini_cli(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback)
+   return _send_gemini_cli(
+    md_content, user_message, base_dir, file_items, discussion_history,
+    pre_tool_callback, qa_callback, stream_callback
+   )
  elif _provider == "anthropic":
-   return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback)
+   return _send_anthropic(
+    md_content, user_message, base_dir, file_items, discussion_history,
+    pre_tool_callback, qa_callback, stream_callback=stream_callback
+   )
  elif _provider == "deepseek":
-   return _send_deepseek(md_content, user_message, base_dir, file_items, discussion_history, stream=stream, pre_tool_callback=pre_tool_callback, qa_callback=qa_callback)
-  raise ValueError(f"unknown provider: {_provider}")
+   return _send_deepseek(
+    md_content, user_message, base_dir, file_items, discussion_history,
+    stream, pre_tool_callback, qa_callback, stream_callback
+   )
+  else:
+   raise ValueError(f"Unknown provider: {_provider}")
+
 def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
 """
    Calculates how close the current conversation history is to the token limit.
@@ -1692,21 +1732,18 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
    # Prepend context as a user part for counting
     history.insert(0, types.Content(role="user", parts=[types.Part.from_text(text=md_content)]))
    if not history:
-     print("[DEBUG] Gemini count_tokens skipped: no history or md_content")
     return {
      "provider": "gemini",
      "limit": effective_limit,
      "current": 0,
      "percentage": 0,
     }
-    print(f"[DEBUG] Gemini count_tokens on {len(history)} messages using model {_model}")
    resp = _gemini_client.models.count_tokens(
     model=_model,
     contents=history
    )
    current_tokens = resp.total_tokens
    percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
-    print(f"[DEBUG] Gemini current_tokens={current_tokens}, percentage={percentage:.4f}%")
    return {
     "provider": "gemini",
     "limit": effective_limit,
@@ -1714,19 +1751,16 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
     "percentage": percentage,
    }
   except Exception as e:
-    print(f"[DEBUG] Gemini count_tokens error: {e}")
    pass
  elif md_content:
   try:
    _ensure_gemini_client()
-    print(f"[DEBUG] Gemini count_tokens (MD ONLY) using model {_model}")
    resp = _gemini_client.models.count_tokens(
     model=_model,
     contents=[types.Content(role="user", parts=[types.Part.from_text(text=md_content)])]
    )
    current_tokens = resp.total_tokens
    percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
-    print(f"[DEBUG] Gemini (MD ONLY) current_tokens={current_tokens}, percentage={percentage:.4f}%")
    return {
     "provider": "gemini",
     "limit": effective_limit,
@@ -1734,7 +1768,6 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
     "percentage": percentage,
    }
   except Exception as e:
-    print(f"[DEBUG] Gemini count_tokens (MD ONLY) error: {e}")
    pass
  return {
   "provider": "gemini",
@@ -1744,12 +1777,9 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
  }
 elif _provider == "gemini_cli":
  effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
-  # For Gemini CLI, we don't have direct count_tokens access without making a call,
-  # so we report the limit and current usage from the last run if available.
  limit_tokens = effective_limit
  current_tokens = 0
  if _gemini_cli_adapter and _gemini_cli_adapter.last_usage:
-  # Stats from CLI use 'input_tokens' or 'input'
   u = _gemini_cli_adapter.last_usage
   current_tokens = u.get("input_tokens") or u.get("input", 0)
  percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
@@ -1777,10 +1807,7 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
       if isinstance(inp, dict):
        import json as _json
        current_tokens += len(_json.dumps(inp, ensure_ascii=False))
-  if md_content:
-   current_tokens += len(md_content)
-  if user_message:
-   current_tokens += len(user_message)
+  if md_content: current_tokens += len(md_content)
  current_tokens = max(1, int(current_tokens / _CHARS_PER_TOKEN))
  percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
  return {
@@ -1789,7 +1816,6 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
   "current": current_tokens,
   "percentage": percentage,
  }
-  # Default empty state
 return {
  "provider": _provider,
  "limit": 0,