feat(gui): implement Phases 2-5 of Comprehensive GUI UX track
- Add cost tracking with new cost_tracker.py module - Enhance Track Proposal modal with editable titles and goals - Add Conductor Setup summary and New Track creation form to MMA Dashboard - Implement Task DAG editing (add/delete tickets) and track-scoped discussion - Add visual polish: color-coded statuses, tinted progress bars, and node indicators - Support live worker streaming from AI providers to GUI panels - Fix numerous integration test regressions and stabilize headless service
This commit is contained in:
156
ai_client.py
156
ai_client.py
@@ -506,7 +506,7 @@ def _truncate_tool_output(output: str) -> str:
|
||||
def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
|
||||
"""
|
||||
Re-read file_items from disk, but only files whose mtime has changed.
|
||||
Returns (all_items, changed_items) — all_items is the full refreshed list,
|
||||
Returns (all_items, changed_items) — all_items is the full refreshed list,
|
||||
changed_items contains only the files that were actually modified since
|
||||
the last read (used to build a minimal [FILES UPDATED] block).
|
||||
"""
|
||||
@@ -523,7 +523,7 @@ def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
|
||||
current_mtime = p.stat().st_mtime
|
||||
prev_mtime = item.get("mtime", 0.0)
|
||||
if current_mtime == prev_mtime:
|
||||
refreshed.append(item) # unchanged — skip re-read
|
||||
refreshed.append(item) # unchanged — skip re-read
|
||||
continue
|
||||
content = p.read_text(encoding="utf-8")
|
||||
new_item = {**item, "old_content": item.get("content", ""), "content": content, "error": False, "mtime": current_mtime}
|
||||
@@ -622,7 +622,8 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
|
||||
discussion_history: str = "",
|
||||
pre_tool_callback: Optional[Callable[[str], bool]] = None,
|
||||
qa_callback: Optional[Callable[[str], str]] = None,
|
||||
enable_tools: bool = True) -> str:
|
||||
enable_tools: bool = True,
|
||||
stream_callback: Optional[Callable[[str], None]] = None) -> str:
|
||||
global _gemini_chat, _gemini_cache, _gemini_cache_md_hash, _gemini_cache_created_at
|
||||
try:
|
||||
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
|
||||
@@ -729,14 +730,27 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
|
||||
r["output"] = val
|
||||
for r_idx in range(MAX_TOOL_ROUNDS + 2):
|
||||
events.emit("request_start", payload={"provider": "gemini", "model": _model, "round": r_idx})
|
||||
resp = _gemini_chat.send_message(payload)
|
||||
txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
|
||||
if stream_callback:
|
||||
resp = _gemini_chat.send_message_stream(payload)
|
||||
txt_chunks = []
|
||||
for chunk in resp:
|
||||
c_txt = chunk.text
|
||||
if c_txt:
|
||||
txt_chunks.append(c_txt)
|
||||
stream_callback(c_txt)
|
||||
txt = "".join(txt_chunks)
|
||||
calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
|
||||
usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
|
||||
cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
|
||||
if cached_tokens: usage["cache_read_input_tokens"] = cached_tokens
|
||||
else:
|
||||
resp = _gemini_chat.send_message(payload)
|
||||
txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
|
||||
calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
|
||||
usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
|
||||
cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
|
||||
if cached_tokens: usage["cache_read_input_tokens"] = cached_tokens
|
||||
if txt: all_text.append(txt)
|
||||
calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
|
||||
usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
|
||||
cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
|
||||
if cached_tokens:
|
||||
usage["cache_read_input_tokens"] = cached_tokens
|
||||
events.emit("response_received", payload={"provider": "gemini", "model": _model, "usage": usage, "round": r_idx})
|
||||
reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
|
||||
_append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
|
||||
@@ -811,7 +825,8 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
|
||||
file_items: list[dict[str, Any]] | None = None,
|
||||
discussion_history: str = "",
|
||||
pre_tool_callback: Optional[Callable[[str], bool]] = None,
|
||||
qa_callback: Optional[Callable[[str], str]] = None) -> str:
|
||||
qa_callback: Optional[Callable[[str], str]] = None,
|
||||
stream_callback: Optional[Callable[[str], None]] = None) -> str:
|
||||
global _gemini_cli_adapter
|
||||
try:
|
||||
if _gemini_cli_adapter is None:
|
||||
@@ -833,7 +848,13 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
|
||||
break
|
||||
events.emit("request_start", payload={"provider": "gemini_cli", "model": _model, "round": r_idx})
|
||||
_append_comms("OUT", "request", {"message": f"[CLI] [round {r_idx}] [msg {len(payload)}]"})
|
||||
resp_data = adapter.send(payload, safety_settings=safety_settings, system_instruction=sys_instr, model=_model)
|
||||
|
||||
# If payload is tool results (list), serialize to JSON string for the CLI
|
||||
send_payload = payload
|
||||
if isinstance(payload, list):
|
||||
send_payload = json.dumps(payload)
|
||||
|
||||
resp_data = adapter.send(send_payload, safety_settings=safety_settings, system_instruction=sys_instr, model=_model)
|
||||
# Log any stderr from the CLI for transparency
|
||||
cli_stderr = resp_data.get("stderr", "")
|
||||
if cli_stderr:
|
||||
@@ -989,7 +1010,7 @@ def _estimate_prompt_tokens(system_blocks: list[dict], history: list[dict]) -> i
|
||||
for block in system_blocks:
|
||||
text = block.get("text", "")
|
||||
total += max(1, int(len(text) / _CHARS_PER_TOKEN))
|
||||
# Tool definitions (rough fixed estimate — they're ~2k tokens for our set)
|
||||
# Tool definitions (rough fixed estimate — they're ~2k tokens for our set)
|
||||
total += 2500
|
||||
# History messages (uses cached estimates for unchanged messages)
|
||||
for msg in history:
|
||||
@@ -1004,7 +1025,7 @@ def _strip_stale_file_refreshes(history: list[dict[str, Any]]) -> None:
|
||||
"""
|
||||
if len(history) < 2:
|
||||
return
|
||||
# Find the index of the last user message — we keep its file refresh intact
|
||||
# Find the index of the last user message — we keep its file refresh intact
|
||||
last_user_idx = -1
|
||||
for i in range(len(history) - 1, -1, -1):
|
||||
if history[i].get("role") == "user":
|
||||
@@ -1120,7 +1141,7 @@ def _add_history_cache_breakpoint(history: list[dict[str, Any]]) -> None:
|
||||
"""
|
||||
user_indices = [i for i, m in enumerate(history) if m.get("role") == "user"]
|
||||
if len(user_indices) < 2:
|
||||
return # Only one user message (the current turn) — nothing stable to cache
|
||||
return # Only one user message (the current turn) — nothing stable to cache
|
||||
target_idx = user_indices[-2]
|
||||
content = history[target_idx].get("content")
|
||||
if isinstance(content, list) and content:
|
||||
@@ -1163,12 +1184,12 @@ def _repair_anthropic_history(history: list[dict[str, Any]]) -> None:
|
||||
],
|
||||
})
|
||||
|
||||
def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict[str, Any]] | None = None, discussion_history: str = "", pre_tool_callback: Optional[Callable[[str], bool]] = None, qa_callback: Optional[Callable[[str], str]] = None) -> str:
|
||||
def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict[str, Any]] | None = None, discussion_history: str = "", pre_tool_callback: Optional[Callable[[str], bool]] = None, qa_callback: Optional[Callable[[str], str]] = None, stream_callback: Optional[Callable[[str], None]] = None) -> str:
|
||||
try:
|
||||
_ensure_anthropic_client()
|
||||
mcp_client.configure(file_items or [], [base_dir])
|
||||
# Split system into two cache breakpoints:
|
||||
# 1. Stable system prompt (never changes — always a cache hit)
|
||||
# 1. Stable system prompt (never changes — always a cache hit)
|
||||
# 2. Dynamic file context (invalidated only when files change)
|
||||
stable_prompt = _get_combined_system_prompt()
|
||||
stable_blocks = [{"type": "text", "text": stable_prompt, "cache_control": {"type": "ephemeral"}}]
|
||||
@@ -1223,14 +1244,28 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
||||
def _strip_private_keys(history: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
return [{k: v for k, v in m.items() if not k.startswith("_")} for m in history]
|
||||
events.emit("request_start", payload={"provider": "anthropic", "model": _model, "round": round_idx})
|
||||
response = _anthropic_client.messages.create(
|
||||
model=_model,
|
||||
max_tokens=_max_tokens,
|
||||
temperature=_temperature,
|
||||
system=system_blocks,
|
||||
tools=_get_anthropic_tools(),
|
||||
messages=_strip_private_keys(_anthropic_history),
|
||||
)
|
||||
if stream_callback:
|
||||
with _anthropic_client.messages.stream(
|
||||
model=_model,
|
||||
max_tokens=_max_tokens,
|
||||
temperature=_temperature,
|
||||
system=system_blocks,
|
||||
tools=_get_anthropic_tools(),
|
||||
messages=_strip_private_keys(_anthropic_history),
|
||||
) as stream:
|
||||
for event in stream:
|
||||
if event.type == "content_block_delta" and event.delta.type == "text_delta":
|
||||
stream_callback(event.delta.text)
|
||||
response = stream.get_final_message()
|
||||
else:
|
||||
response = _anthropic_client.messages.create(
|
||||
model=_model,
|
||||
max_tokens=_max_tokens,
|
||||
temperature=_temperature,
|
||||
system=system_blocks,
|
||||
tools=_get_anthropic_tools(),
|
||||
messages=_strip_private_keys(_anthropic_history),
|
||||
)
|
||||
# Convert SDK content block objects to plain dicts before storing in history
|
||||
serialised_content = [_content_block_to_dict(b) for b in response.content]
|
||||
_anthropic_history.append({
|
||||
@@ -1327,7 +1362,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
||||
"text": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
|
||||
})
|
||||
_append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
|
||||
# Refresh file context after tool calls — only inject CHANGED files
|
||||
# Refresh file context after tool calls — only inject CHANGED files
|
||||
if file_items:
|
||||
file_items, changed = _reread_file_items(file_items)
|
||||
refreshed_ctx = _build_file_diff_text(changed)
|
||||
@@ -1335,7 +1370,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
||||
tool_results.append({
|
||||
"type": "text",
|
||||
"text": (
|
||||
"[FILES UPDATED — current contents below. "
|
||||
"[FILES UPDATED — current contents below. "
|
||||
"Do NOT re-read these files with PowerShell.]\n\n"
|
||||
+ refreshed_ctx
|
||||
),
|
||||
@@ -1377,7 +1412,8 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
|
||||
discussion_history: str = "",
|
||||
stream: bool = False,
|
||||
pre_tool_callback: Optional[Callable[[str], bool]] = None,
|
||||
qa_callback: Optional[Callable[[str], str]] = None) -> str:
|
||||
qa_callback: Optional[Callable[[str], str]] = None,
|
||||
stream_callback: Optional[Callable[[str], None]] = None) -> str:
|
||||
"""
|
||||
Sends a message to the DeepSeek API, handling tool calls and history.
|
||||
Supports streaming responses.
|
||||
@@ -1444,7 +1480,10 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
|
||||
chunk = json.loads(chunk_str)
|
||||
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
||||
if delta.get("content"):
|
||||
aggregated_content += delta["content"]
|
||||
content_chunk = delta["content"]
|
||||
aggregated_content += content_chunk
|
||||
if stream_callback:
|
||||
stream_callback(content_chunk)
|
||||
if delta.get("reasoning_content"):
|
||||
aggregated_reasoning += delta["reasoning_content"]
|
||||
if delta.get("tool_calls"):
|
||||
@@ -1615,10 +1654,7 @@ def run_tier4_analysis(stderr: str) -> str:
|
||||
# ------------------------------------------------------------------ unified send
|
||||
|
||||
import json
|
||||
from typing import Any, Callable, Optional, List
|
||||
|
||||
# Assuming _model, _system_prompt, _provider, _send_lock are module-level variables
|
||||
# and the _send_xxx functions are also defined at module level.
|
||||
|
||||
def send(
|
||||
md_content: str,
|
||||
@@ -1630,32 +1666,36 @@ def send(
|
||||
pre_tool_callback: Optional[Callable[[str], bool]] = None,
|
||||
qa_callback: Optional[Callable[[str], str]] = None,
|
||||
enable_tools: bool = True,
|
||||
stream_callback: Optional[Callable[[str], None]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Send a message to the active provider.
|
||||
|
||||
md_content : aggregated markdown string (for Gemini: stable content only,
|
||||
for Anthropic: full content including history)
|
||||
user_message : the user question / instruction
|
||||
base_dir : project base directory (for PowerShell tool calls)
|
||||
file_items : list of file dicts from aggregate.build_file_items() for
|
||||
dynamic context refresh after tool calls
|
||||
discussion_history : discussion history text (used by Gemini to inject as
|
||||
conversation message instead of caching it)
|
||||
stream : Whether to use streaming (supported by DeepSeek)
|
||||
pre_tool_callback : Optional callback (payload: str) -> bool called before tool execution
|
||||
qa_callback : Optional callback (stderr: str) -> str called for Tier 4 error analysis
|
||||
Sends a prompt with the full markdown context to the current AI provider.
|
||||
Returns the final text response.
|
||||
"""
|
||||
with _send_lock:
|
||||
if _provider == "gemini":
|
||||
return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback, enable_tools=enable_tools)
|
||||
return _send_gemini(
|
||||
md_content, user_message, base_dir, file_items, discussion_history,
|
||||
pre_tool_callback, qa_callback, enable_tools, stream_callback
|
||||
)
|
||||
elif _provider == "gemini_cli":
|
||||
return _send_gemini_cli(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback)
|
||||
return _send_gemini_cli(
|
||||
md_content, user_message, base_dir, file_items, discussion_history,
|
||||
pre_tool_callback, qa_callback, stream_callback
|
||||
)
|
||||
elif _provider == "anthropic":
|
||||
return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback)
|
||||
return _send_anthropic(
|
||||
md_content, user_message, base_dir, file_items, discussion_history,
|
||||
pre_tool_callback, qa_callback, stream_callback=stream_callback
|
||||
)
|
||||
elif _provider == "deepseek":
|
||||
return _send_deepseek(md_content, user_message, base_dir, file_items, discussion_history, stream=stream, pre_tool_callback=pre_tool_callback, qa_callback=qa_callback)
|
||||
raise ValueError(f"unknown provider: {_provider}")
|
||||
return _send_deepseek(
|
||||
md_content, user_message, base_dir, file_items, discussion_history,
|
||||
stream, pre_tool_callback, qa_callback, stream_callback
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown provider: {_provider}")
|
||||
|
||||
def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
"""
|
||||
Calculates how close the current conversation history is to the token limit.
|
||||
@@ -1692,21 +1732,18 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
# Prepend context as a user part for counting
|
||||
history.insert(0, types.Content(role="user", parts=[types.Part.from_text(text=md_content)]))
|
||||
if not history:
|
||||
print("[DEBUG] Gemini count_tokens skipped: no history or md_content")
|
||||
return {
|
||||
"provider": "gemini",
|
||||
"limit": effective_limit,
|
||||
"current": 0,
|
||||
"percentage": 0,
|
||||
}
|
||||
print(f"[DEBUG] Gemini count_tokens on {len(history)} messages using model {_model}")
|
||||
resp = _gemini_client.models.count_tokens(
|
||||
model=_model,
|
||||
contents=history
|
||||
)
|
||||
current_tokens = resp.total_tokens
|
||||
percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
|
||||
print(f"[DEBUG] Gemini current_tokens={current_tokens}, percentage={percentage:.4f}%")
|
||||
return {
|
||||
"provider": "gemini",
|
||||
"limit": effective_limit,
|
||||
@@ -1714,19 +1751,16 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
"percentage": percentage,
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"[DEBUG] Gemini count_tokens error: {e}")
|
||||
pass
|
||||
elif md_content:
|
||||
try:
|
||||
_ensure_gemini_client()
|
||||
print(f"[DEBUG] Gemini count_tokens (MD ONLY) using model {_model}")
|
||||
resp = _gemini_client.models.count_tokens(
|
||||
model=_model,
|
||||
contents=[types.Content(role="user", parts=[types.Part.from_text(text=md_content)])]
|
||||
)
|
||||
current_tokens = resp.total_tokens
|
||||
percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
|
||||
print(f"[DEBUG] Gemini (MD ONLY) current_tokens={current_tokens}, percentage={percentage:.4f}%")
|
||||
return {
|
||||
"provider": "gemini",
|
||||
"limit": effective_limit,
|
||||
@@ -1734,7 +1768,6 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
"percentage": percentage,
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"[DEBUG] Gemini count_tokens (MD ONLY) error: {e}")
|
||||
pass
|
||||
return {
|
||||
"provider": "gemini",
|
||||
@@ -1744,12 +1777,9 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
}
|
||||
elif _provider == "gemini_cli":
|
||||
effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
|
||||
# For Gemini CLI, we don't have direct count_tokens access without making a call,
|
||||
# so we report the limit and current usage from the last run if available.
|
||||
limit_tokens = effective_limit
|
||||
current_tokens = 0
|
||||
if _gemini_cli_adapter and _gemini_cli_adapter.last_usage:
|
||||
# Stats from CLI use 'input_tokens' or 'input'
|
||||
u = _gemini_cli_adapter.last_usage
|
||||
current_tokens = u.get("input_tokens") or u.get("input", 0)
|
||||
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
||||
@@ -1777,10 +1807,7 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
if isinstance(inp, dict):
|
||||
import json as _json
|
||||
current_tokens += len(_json.dumps(inp, ensure_ascii=False))
|
||||
if md_content:
|
||||
current_tokens += len(md_content)
|
||||
if user_message:
|
||||
current_tokens += len(user_message)
|
||||
if md_content: current_tokens += len(md_content)
|
||||
current_tokens = max(1, int(current_tokens / _CHARS_PER_TOKEN))
|
||||
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
||||
return {
|
||||
@@ -1789,7 +1816,6 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
"current": current_tokens,
|
||||
"percentage": percentage,
|
||||
}
|
||||
# Default empty state
|
||||
return {
|
||||
"provider": _provider,
|
||||
"limit": 0,
|
||||
|
||||
Reference in New Issue
Block a user