feat(gui): implement Phases 2-5 of Comprehensive GUI UX track

- Add cost tracking with new cost_tracker.py module
- Enhance Track Proposal modal with editable titles and goals
- Add Conductor Setup summary and New Track creation form to MMA Dashboard
- Implement Task DAG editing (add/delete tickets) and track-scoped discussion
- Add visual polish: color-coded statuses, tinted progress bars, and node indicators
- Support live worker streaming from AI providers to GUI panels
- Fix numerous integration test regressions and stabilize headless service
This commit is contained in:
2026-03-01 20:17:31 -05:00
parent 2ce7a87069
commit d1ce0eaaeb
27 changed files with 1763 additions and 254 deletions

View File

@@ -506,7 +506,7 @@ def _truncate_tool_output(output: str) -> str:
def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
"""
Re-read file_items from disk, but only files whose mtime has changed.
Returns (all_items, changed_items) all_items is the full refreshed list,
Returns (all_items, changed_items) — all_items is the full refreshed list,
changed_items contains only the files that were actually modified since
the last read (used to build a minimal [FILES UPDATED] block).
"""
@@ -523,7 +523,7 @@ def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
current_mtime = p.stat().st_mtime
prev_mtime = item.get("mtime", 0.0)
if current_mtime == prev_mtime:
refreshed.append(item) # unchanged skip re-read
refreshed.append(item) # unchanged — skip re-read
continue
content = p.read_text(encoding="utf-8")
new_item = {**item, "old_content": item.get("content", ""), "content": content, "error": False, "mtime": current_mtime}
@@ -622,7 +622,8 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
discussion_history: str = "",
pre_tool_callback: Optional[Callable[[str], bool]] = None,
qa_callback: Optional[Callable[[str], str]] = None,
enable_tools: bool = True) -> str:
enable_tools: bool = True,
stream_callback: Optional[Callable[[str], None]] = None) -> str:
global _gemini_chat, _gemini_cache, _gemini_cache_md_hash, _gemini_cache_created_at
try:
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
@@ -729,14 +730,27 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
r["output"] = val
for r_idx in range(MAX_TOOL_ROUNDS + 2):
events.emit("request_start", payload={"provider": "gemini", "model": _model, "round": r_idx})
resp = _gemini_chat.send_message(payload)
txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
if stream_callback:
resp = _gemini_chat.send_message_stream(payload)
txt_chunks = []
for chunk in resp:
c_txt = chunk.text
if c_txt:
txt_chunks.append(c_txt)
stream_callback(c_txt)
txt = "".join(txt_chunks)
calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
if cached_tokens: usage["cache_read_input_tokens"] = cached_tokens
else:
resp = _gemini_chat.send_message(payload)
txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
if cached_tokens: usage["cache_read_input_tokens"] = cached_tokens
if txt: all_text.append(txt)
calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
if cached_tokens:
usage["cache_read_input_tokens"] = cached_tokens
events.emit("response_received", payload={"provider": "gemini", "model": _model, "usage": usage, "round": r_idx})
reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
_append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
@@ -811,7 +825,8 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
file_items: list[dict[str, Any]] | None = None,
discussion_history: str = "",
pre_tool_callback: Optional[Callable[[str], bool]] = None,
qa_callback: Optional[Callable[[str], str]] = None) -> str:
qa_callback: Optional[Callable[[str], str]] = None,
stream_callback: Optional[Callable[[str], None]] = None) -> str:
global _gemini_cli_adapter
try:
if _gemini_cli_adapter is None:
@@ -833,7 +848,13 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
break
events.emit("request_start", payload={"provider": "gemini_cli", "model": _model, "round": r_idx})
_append_comms("OUT", "request", {"message": f"[CLI] [round {r_idx}] [msg {len(payload)}]"})
resp_data = adapter.send(payload, safety_settings=safety_settings, system_instruction=sys_instr, model=_model)
# If payload is tool results (list), serialize to JSON string for the CLI
send_payload = payload
if isinstance(payload, list):
send_payload = json.dumps(payload)
resp_data = adapter.send(send_payload, safety_settings=safety_settings, system_instruction=sys_instr, model=_model)
# Log any stderr from the CLI for transparency
cli_stderr = resp_data.get("stderr", "")
if cli_stderr:
@@ -989,7 +1010,7 @@ def _estimate_prompt_tokens(system_blocks: list[dict], history: list[dict]) -> i
for block in system_blocks:
text = block.get("text", "")
total += max(1, int(len(text) / _CHARS_PER_TOKEN))
# Tool definitions (rough fixed estimate they're ~2k tokens for our set)
# Tool definitions (rough fixed estimate — they're ~2k tokens for our set)
total += 2500
# History messages (uses cached estimates for unchanged messages)
for msg in history:
@@ -1004,7 +1025,7 @@ def _strip_stale_file_refreshes(history: list[dict[str, Any]]) -> None:
"""
if len(history) < 2:
return
# Find the index of the last user message we keep its file refresh intact
# Find the index of the last user message — we keep its file refresh intact
last_user_idx = -1
for i in range(len(history) - 1, -1, -1):
if history[i].get("role") == "user":
@@ -1120,7 +1141,7 @@ def _add_history_cache_breakpoint(history: list[dict[str, Any]]) -> None:
"""
user_indices = [i for i, m in enumerate(history) if m.get("role") == "user"]
if len(user_indices) < 2:
return # Only one user message (the current turn) nothing stable to cache
return # Only one user message (the current turn) — nothing stable to cache
target_idx = user_indices[-2]
content = history[target_idx].get("content")
if isinstance(content, list) and content:
@@ -1163,12 +1184,12 @@ def _repair_anthropic_history(history: list[dict[str, Any]]) -> None:
],
})
def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict[str, Any]] | None = None, discussion_history: str = "", pre_tool_callback: Optional[Callable[[str], bool]] = None, qa_callback: Optional[Callable[[str], str]] = None) -> str:
def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict[str, Any]] | None = None, discussion_history: str = "", pre_tool_callback: Optional[Callable[[str], bool]] = None, qa_callback: Optional[Callable[[str], str]] = None, stream_callback: Optional[Callable[[str], None]] = None) -> str:
try:
_ensure_anthropic_client()
mcp_client.configure(file_items or [], [base_dir])
# Split system into two cache breakpoints:
# 1. Stable system prompt (never changes always a cache hit)
# 1. Stable system prompt (never changes — always a cache hit)
# 2. Dynamic file context (invalidated only when files change)
stable_prompt = _get_combined_system_prompt()
stable_blocks = [{"type": "text", "text": stable_prompt, "cache_control": {"type": "ephemeral"}}]
@@ -1223,14 +1244,28 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
def _strip_private_keys(history: list[dict[str, Any]]) -> list[dict[str, Any]]:
return [{k: v for k, v in m.items() if not k.startswith("_")} for m in history]
events.emit("request_start", payload={"provider": "anthropic", "model": _model, "round": round_idx})
response = _anthropic_client.messages.create(
model=_model,
max_tokens=_max_tokens,
temperature=_temperature,
system=system_blocks,
tools=_get_anthropic_tools(),
messages=_strip_private_keys(_anthropic_history),
)
if stream_callback:
with _anthropic_client.messages.stream(
model=_model,
max_tokens=_max_tokens,
temperature=_temperature,
system=system_blocks,
tools=_get_anthropic_tools(),
messages=_strip_private_keys(_anthropic_history),
) as stream:
for event in stream:
if event.type == "content_block_delta" and event.delta.type == "text_delta":
stream_callback(event.delta.text)
response = stream.get_final_message()
else:
response = _anthropic_client.messages.create(
model=_model,
max_tokens=_max_tokens,
temperature=_temperature,
system=system_blocks,
tools=_get_anthropic_tools(),
messages=_strip_private_keys(_anthropic_history),
)
# Convert SDK content block objects to plain dicts before storing in history
serialised_content = [_content_block_to_dict(b) for b in response.content]
_anthropic_history.append({
@@ -1327,7 +1362,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
"text": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
})
_append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
# Refresh file context after tool calls only inject CHANGED files
# Refresh file context after tool calls — only inject CHANGED files
if file_items:
file_items, changed = _reread_file_items(file_items)
refreshed_ctx = _build_file_diff_text(changed)
@@ -1335,7 +1370,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
tool_results.append({
"type": "text",
"text": (
"[FILES UPDATED current contents below. "
"[FILES UPDATED — current contents below. "
"Do NOT re-read these files with PowerShell.]\n\n"
+ refreshed_ctx
),
@@ -1377,7 +1412,8 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
discussion_history: str = "",
stream: bool = False,
pre_tool_callback: Optional[Callable[[str], bool]] = None,
qa_callback: Optional[Callable[[str], str]] = None) -> str:
qa_callback: Optional[Callable[[str], str]] = None,
stream_callback: Optional[Callable[[str], None]] = None) -> str:
"""
Sends a message to the DeepSeek API, handling tool calls and history.
Supports streaming responses.
@@ -1444,7 +1480,10 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
chunk = json.loads(chunk_str)
delta = chunk.get("choices", [{}])[0].get("delta", {})
if delta.get("content"):
aggregated_content += delta["content"]
content_chunk = delta["content"]
aggregated_content += content_chunk
if stream_callback:
stream_callback(content_chunk)
if delta.get("reasoning_content"):
aggregated_reasoning += delta["reasoning_content"]
if delta.get("tool_calls"):
@@ -1615,10 +1654,7 @@ def run_tier4_analysis(stderr: str) -> str:
# ------------------------------------------------------------------ unified send
import json
from typing import Any, Callable, Optional, List
# Assuming _model, _system_prompt, _provider, _send_lock are module-level variables
# and the _send_xxx functions are also defined at module level.
def send(
md_content: str,
@@ -1630,32 +1666,36 @@ def send(
pre_tool_callback: Optional[Callable[[str], bool]] = None,
qa_callback: Optional[Callable[[str], str]] = None,
enable_tools: bool = True,
stream_callback: Optional[Callable[[str], None]] = None,
) -> str:
"""
Send a message to the active provider.
md_content : aggregated markdown string (for Gemini: stable content only,
for Anthropic: full content including history)
user_message : the user question / instruction
base_dir : project base directory (for PowerShell tool calls)
file_items : list of file dicts from aggregate.build_file_items() for
dynamic context refresh after tool calls
discussion_history : discussion history text (used by Gemini to inject as
conversation message instead of caching it)
stream : Whether to use streaming (supported by DeepSeek)
pre_tool_callback : Optional callback (payload: str) -> bool called before tool execution
qa_callback : Optional callback (stderr: str) -> str called for Tier 4 error analysis
Sends a prompt with the full markdown context to the current AI provider.
Returns the final text response.
"""
with _send_lock:
if _provider == "gemini":
return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback, enable_tools=enable_tools)
return _send_gemini(
md_content, user_message, base_dir, file_items, discussion_history,
pre_tool_callback, qa_callback, enable_tools, stream_callback
)
elif _provider == "gemini_cli":
return _send_gemini_cli(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback)
return _send_gemini_cli(
md_content, user_message, base_dir, file_items, discussion_history,
pre_tool_callback, qa_callback, stream_callback
)
elif _provider == "anthropic":
return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback)
return _send_anthropic(
md_content, user_message, base_dir, file_items, discussion_history,
pre_tool_callback, qa_callback, stream_callback=stream_callback
)
elif _provider == "deepseek":
return _send_deepseek(md_content, user_message, base_dir, file_items, discussion_history, stream=stream, pre_tool_callback=pre_tool_callback, qa_callback=qa_callback)
raise ValueError(f"unknown provider: {_provider}")
return _send_deepseek(
md_content, user_message, base_dir, file_items, discussion_history,
stream, pre_tool_callback, qa_callback, stream_callback
)
else:
raise ValueError(f"Unknown provider: {_provider}")
def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
"""
Calculates how close the current conversation history is to the token limit.
@@ -1692,21 +1732,18 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
# Prepend context as a user part for counting
history.insert(0, types.Content(role="user", parts=[types.Part.from_text(text=md_content)]))
if not history:
print("[DEBUG] Gemini count_tokens skipped: no history or md_content")
return {
"provider": "gemini",
"limit": effective_limit,
"current": 0,
"percentage": 0,
}
print(f"[DEBUG] Gemini count_tokens on {len(history)} messages using model {_model}")
resp = _gemini_client.models.count_tokens(
model=_model,
contents=history
)
current_tokens = resp.total_tokens
percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
print(f"[DEBUG] Gemini current_tokens={current_tokens}, percentage={percentage:.4f}%")
return {
"provider": "gemini",
"limit": effective_limit,
@@ -1714,19 +1751,16 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
"percentage": percentage,
}
except Exception as e:
print(f"[DEBUG] Gemini count_tokens error: {e}")
pass
elif md_content:
try:
_ensure_gemini_client()
print(f"[DEBUG] Gemini count_tokens (MD ONLY) using model {_model}")
resp = _gemini_client.models.count_tokens(
model=_model,
contents=[types.Content(role="user", parts=[types.Part.from_text(text=md_content)])]
)
current_tokens = resp.total_tokens
percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
print(f"[DEBUG] Gemini (MD ONLY) current_tokens={current_tokens}, percentage={percentage:.4f}%")
return {
"provider": "gemini",
"limit": effective_limit,
@@ -1734,7 +1768,6 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
"percentage": percentage,
}
except Exception as e:
print(f"[DEBUG] Gemini count_tokens (MD ONLY) error: {e}")
pass
return {
"provider": "gemini",
@@ -1744,12 +1777,9 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
}
elif _provider == "gemini_cli":
effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
# For Gemini CLI, we don't have direct count_tokens access without making a call,
# so we report the limit and current usage from the last run if available.
limit_tokens = effective_limit
current_tokens = 0
if _gemini_cli_adapter and _gemini_cli_adapter.last_usage:
# Stats from CLI use 'input_tokens' or 'input'
u = _gemini_cli_adapter.last_usage
current_tokens = u.get("input_tokens") or u.get("input", 0)
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
@@ -1777,10 +1807,7 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
if isinstance(inp, dict):
import json as _json
current_tokens += len(_json.dumps(inp, ensure_ascii=False))
if md_content:
current_tokens += len(md_content)
if user_message:
current_tokens += len(user_message)
if md_content: current_tokens += len(md_content)
current_tokens = max(1, int(current_tokens / _CHARS_PER_TOKEN))
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
return {
@@ -1789,7 +1816,6 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
"current": current_tokens,
"percentage": percentage,
}
# Default empty state
return {
"provider": _provider,
"limit": 0,