|
|
|
@@ -12,7 +12,6 @@ For Gemini: injects the initial context directly into system_instruction
|
|
|
|
|
during chat creation to avoid massive history bloat.
|
|
|
|
|
"""
|
|
|
|
|
# ai_client.py
|
|
|
|
|
# ai_client.py
|
|
|
|
|
import tomllib
|
|
|
|
|
import asyncio
|
|
|
|
|
import json
|
|
|
|
@@ -53,7 +52,7 @@ events: EventEmitter = EventEmitter()
|
|
|
|
|
def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000, top_p: float = 1.0) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Sets global generation parameters like temperature and max tokens.
|
|
|
|
|
[C: src/app_controller.py:AppController._handle_request_event, src/app_controller.py:AppController.generate, tests/test_history_management.py:test_get_history_bleed_stats_basic]
|
|
|
|
|
[C: src/app_controller.py:AppController._handle_request_event, src/app_controller.py:AppController.generate]
|
|
|
|
|
"""
|
|
|
|
|
global _temperature, _max_tokens, _history_trunc_limit, _top_p
|
|
|
|
|
_temperature = temp
|
|
|
|
@@ -61,13 +60,6 @@ def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000, top_p:
|
|
|
|
|
_history_trunc_limit = trunc_limit
|
|
|
|
|
_top_p = top_p
|
|
|
|
|
|
|
|
|
|
def get_history_trunc_limit() -> int:
|
|
|
|
|
return _history_trunc_limit
|
|
|
|
|
|
|
|
|
|
def set_history_trunc_limit(val: int) -> None:
|
|
|
|
|
global _history_trunc_limit
|
|
|
|
|
_history_trunc_limit = val
|
|
|
|
|
|
|
|
|
|
_gemini_client: Optional[genai.Client] = None
|
|
|
|
|
_gemini_chat: Any = None
|
|
|
|
|
_gemini_cache: Any = None
|
|
|
|
@@ -438,7 +430,7 @@ def _classify_minimax_error(exc: Exception) -> ProviderError:
|
|
|
|
|
def set_provider(provider: str, model: str) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Updates the active LLM provider and model name.
|
|
|
|
|
[C: src/app_controller.py:AppController._handle_reset_session, src/app_controller.py:AppController._init_ai_and_hooks, src/app_controller.py:AppController.current_model, src/app_controller.py:AppController.current_provider, src/app_controller.py:AppController.do_fetch, src/multi_agent_conductor.py:run_worker_lifecycle, src/orchestrator_pm.py:generate_tracks, tests/conftest.py:reset_ai_client, tests/test_ai_cache_tracking.py:test_gemini_cache_tracking, tests/test_ai_client_cli.py:test_ai_client_send_gemini_cli, tests/test_api_events.py:test_send_emits_events_proper, tests/test_api_events.py:test_send_emits_tool_events, tests/test_deepseek_provider.py:test_deepseek_completion_logic, tests/test_deepseek_provider.py:test_deepseek_model_selection, tests/test_deepseek_provider.py:test_deepseek_payload_verification, tests/test_deepseek_provider.py:test_deepseek_reasoner_payload_verification, tests/test_deepseek_provider.py:test_deepseek_reasoning_logic, tests/test_deepseek_provider.py:test_deepseek_streaming, tests/test_deepseek_provider.py:test_deepseek_tool_calling, tests/test_gemini_cli_edge_cases.py:test_gemini_cli_loop_termination, tests/test_gemini_cli_integration.py:test_gemini_cli_full_integration, tests/test_gemini_cli_integration.py:test_gemini_cli_rejection_and_history, tests/test_gemini_cli_parity_regression.py:test_get_history_bleed_stats, tests/test_gemini_cli_parity_regression.py:test_send_invokes_adapter_send, tests/test_gui2_mcp.py:test_mcp_tool_call_is_dispatched, tests/test_history_management.py:test_get_history_bleed_stats_basic, tests/test_minimax_provider.py:test_minimax_default_model, tests/test_minimax_provider.py:test_minimax_history_bleed_stats, tests/test_minimax_provider.py:test_minimax_model_selection, tests/test_mma_agent_focus_phase1.py:test_append_comms_has_source_tier_key, tests/test_rag_integration.py:test_rag_integration, tests/test_tier4_interceptor.py:test_ai_client_passes_qa_callback, tests/test_tier4_interceptor.py:test_gemini_provider_passes_qa_callback_to_run_script, tests/test_token_usage.py:test_token_usage_tracking, tests/test_token_viz.py:test_get_history_bleed_stats_returns_all_keys_unknown_provider]
|
|
|
|
|
[C: src/app_controller.py:AppController._handle_reset_session, src/app_controller.py:AppController._init_ai_and_hooks, src/app_controller.py:AppController.current_model, src/app_controller.py:AppController.current_provider, src/app_controller.py:AppController.do_fetch, src/multi_agent_conductor.py:run_worker_lifecycle, src/orchestrator_pm.py:generate_tracks, tests/conftest.py:reset_ai_client, tests/test_ai_cache_tracking.py:test_gemini_cache_tracking, tests/test_ai_client_cli.py:test_ai_client_send_gemini_cli, tests/test_api_events.py:test_send_emits_events_proper, tests/test_api_events.py:test_send_emits_tool_events, tests/test_deepseek_provider.py:test_deepseek_completion_logic, tests/test_deepseek_provider.py:test_deepseek_model_selection, tests/test_deepseek_provider.py:test_deepseek_payload_verification, tests/test_deepseek_provider.py:test_deepseek_reasoner_payload_verification, tests/test_deepseek_provider.py:test_deepseek_reasoning_logic, tests/test_deepseek_provider.py:test_deepseek_streaming, tests/test_deepseek_provider.py:test_deepseek_tool_calling, tests/test_gemini_cli_edge_cases.py:test_gemini_cli_loop_termination, tests/test_gemini_cli_integration.py:test_gemini_cli_full_integration, tests/test_gemini_cli_integration.py:test_gemini_cli_rejection_and_history, tests/test_gemini_cli_parity_regression.py:test_send_invokes_adapter_send, tests/test_gui2_mcp.py:test_mcp_tool_call_is_dispatched, tests/test_minimax_provider.py:test_minimax_default_model, tests/test_minimax_provider.py:test_minimax_model_selection, tests/test_mma_agent_focus_phase1.py:test_append_comms_has_source_tier_key, tests/test_rag_integration.py:test_rag_integration, tests/test_tier4_interceptor.py:test_ai_client_passes_qa_callback, tests/test_tier4_interceptor.py:test_gemini_provider_passes_qa_callback_to_run_script, tests/test_token_usage.py:test_token_usage_tracking]
|
|
|
|
|
"""
|
|
|
|
|
global _provider, _model
|
|
|
|
|
_provider = provider
|
|
|
|
@@ -2444,165 +2436,12 @@ def _add_bleed_derived(d: dict[str, Any], sys_tok: int = 0, tool_tok: int = 0) -
|
|
|
|
|
if os.environ.get("SLOP_TOOL_PRESET"):
|
|
|
|
|
try:
|
|
|
|
|
set_tool_preset(os.environ["SLOP_TOOL_PRESET"])
|
|
|
|
|
except Exception as _e:
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def get_history_bleed_stats(md_content: Optional[str] = None) -> dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
[C: tests/test_gemini_cli_parity_regression.py:test_get_history_bleed_stats, tests/test_history_management.py:test_get_history_bleed_stats_basic, tests/test_minimax_provider.py:test_minimax_history_bleed_stats, tests/test_token_viz.py:test_get_history_bleed_stats_returns_all_keys_unknown_provider]
|
|
|
|
|
"""
|
|
|
|
|
if _provider == "anthropic":
|
|
|
|
|
with _anthropic_history_lock:
|
|
|
|
|
history_snapshot = list(_anthropic_history)
|
|
|
|
|
sys_tok = max(1, int(len(md_content) / _CHARS_PER_TOKEN)) if md_content else 0
|
|
|
|
|
current_tokens = _estimate_prompt_tokens([], history_snapshot)
|
|
|
|
|
if md_content:
|
|
|
|
|
current_tokens += max(1, int(len(md_content) / _CHARS_PER_TOKEN))
|
|
|
|
|
limit_tokens = _ANTHROPIC_MAX_PROMPT_TOKENS
|
|
|
|
|
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
|
|
|
|
return _add_bleed_derived({
|
|
|
|
|
"provider": "anthropic",
|
|
|
|
|
"limit": limit_tokens,
|
|
|
|
|
"current": current_tokens,
|
|
|
|
|
"percentage": percentage,
|
|
|
|
|
}, sys_tok=sys_tok, tool_tok=2500)
|
|
|
|
|
elif _provider == "gemini":
|
|
|
|
|
effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
|
|
|
|
|
if _gemini_chat:
|
|
|
|
|
try:
|
|
|
|
|
_ensure_gemini_client()
|
|
|
|
|
if _gemini_client:
|
|
|
|
|
raw_history = list(_get_gemini_history_list(_gemini_chat))
|
|
|
|
|
history: list[types.Content] = []
|
|
|
|
|
for c in raw_history:
|
|
|
|
|
role = "model" if c.role in ["assistant", "model"] else "user"
|
|
|
|
|
history.append(types.Content(role=role, parts=c.parts))
|
|
|
|
|
if md_content:
|
|
|
|
|
history.insert(0, types.Content(role="user", parts=[types.Part(text=md_content)]))
|
|
|
|
|
if not history:
|
|
|
|
|
return _add_bleed_derived({
|
|
|
|
|
"provider": "gemini",
|
|
|
|
|
"limit": effective_limit,
|
|
|
|
|
"current": 0,
|
|
|
|
|
"percentage": 0,
|
|
|
|
|
})
|
|
|
|
|
resp = _gemini_client.models.count_tokens(
|
|
|
|
|
model=_model,
|
|
|
|
|
contents=history
|
|
|
|
|
)
|
|
|
|
|
current_tokens = cast(int, resp.total_tokens)
|
|
|
|
|
percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
|
|
|
|
|
return _add_bleed_derived({
|
|
|
|
|
"provider": "gemini",
|
|
|
|
|
"limit": effective_limit,
|
|
|
|
|
"current": current_tokens,
|
|
|
|
|
"percentage": percentage,
|
|
|
|
|
}, sys_tok=0, tool_tok=0)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
elif md_content:
|
|
|
|
|
try:
|
|
|
|
|
_ensure_gemini_client()
|
|
|
|
|
if _gemini_client:
|
|
|
|
|
resp = _gemini_client.models.count_tokens(
|
|
|
|
|
model=_model,
|
|
|
|
|
contents=[types.Content(role="user", parts=[types.Part(text=md_content)])]
|
|
|
|
|
)
|
|
|
|
|
current_tokens = cast(int, resp.total_tokens)
|
|
|
|
|
percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
|
|
|
|
|
return _add_bleed_derived({
|
|
|
|
|
"provider": "gemini",
|
|
|
|
|
"limit": effective_limit,
|
|
|
|
|
"current": current_tokens,
|
|
|
|
|
"percentage": percentage,
|
|
|
|
|
})
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return _add_bleed_derived({
|
|
|
|
|
"provider": "gemini",
|
|
|
|
|
"limit": effective_limit,
|
|
|
|
|
"current": 0,
|
|
|
|
|
"percentage": 0,
|
|
|
|
|
})
|
|
|
|
|
elif _provider == "gemini_cli":
|
|
|
|
|
effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
|
|
|
|
|
limit_tokens = effective_limit
|
|
|
|
|
current_tokens = 0
|
|
|
|
|
if _gemini_cli_adapter and _gemini_cli_adapter.last_usage:
|
|
|
|
|
u = _gemini_cli_adapter.last_usage
|
|
|
|
|
current_tokens = cast(int, u.get("input_tokens") or u.get("input", 0))
|
|
|
|
|
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
|
|
|
|
return _add_bleed_derived({
|
|
|
|
|
"provider": "gemini_cli",
|
|
|
|
|
"limit": limit_tokens,
|
|
|
|
|
"current": current_tokens,
|
|
|
|
|
"percentage": percentage,
|
|
|
|
|
})
|
|
|
|
|
elif _provider == "deepseek":
|
|
|
|
|
limit_tokens = 64000
|
|
|
|
|
current_tokens = 0
|
|
|
|
|
with _deepseek_history_lock:
|
|
|
|
|
for msg in _deepseek_history:
|
|
|
|
|
content = msg.get("content", "")
|
|
|
|
|
if isinstance(content, str):
|
|
|
|
|
current_tokens += len(content)
|
|
|
|
|
elif isinstance(content, list):
|
|
|
|
|
for block in content:
|
|
|
|
|
if isinstance(block, dict):
|
|
|
|
|
text = block.get("text", "")
|
|
|
|
|
if isinstance(text, str):
|
|
|
|
|
current_tokens += len(text)
|
|
|
|
|
inp = block.get("input")
|
|
|
|
|
if isinstance(inp, dict):
|
|
|
|
|
import json as _json
|
|
|
|
|
current_tokens += len(_json.dumps(inp, ensure_ascii=False))
|
|
|
|
|
if md_content: current_tokens += len(md_content)
|
|
|
|
|
current_tokens = max(1, int(current_tokens / _CHARS_PER_TOKEN))
|
|
|
|
|
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
|
|
|
|
return _add_bleed_derived({
|
|
|
|
|
"provider": "deepseek",
|
|
|
|
|
"limit": limit_tokens,
|
|
|
|
|
"current": current_tokens,
|
|
|
|
|
"percentage": percentage,
|
|
|
|
|
})
|
|
|
|
|
elif _provider == "minimax":
|
|
|
|
|
limit_tokens = 204800
|
|
|
|
|
current_tokens = 0
|
|
|
|
|
with _minimax_history_lock:
|
|
|
|
|
for msg in _minimax_history:
|
|
|
|
|
content = msg.get("content", "")
|
|
|
|
|
if isinstance(content, str):
|
|
|
|
|
current_tokens += len(content)
|
|
|
|
|
elif isinstance(content, list):
|
|
|
|
|
for block in content:
|
|
|
|
|
if isinstance(block, dict):
|
|
|
|
|
text = block.get("text", "")
|
|
|
|
|
if isinstance(text, str):
|
|
|
|
|
current_tokens += len(text)
|
|
|
|
|
inp = block.get("input")
|
|
|
|
|
if isinstance(inp, dict):
|
|
|
|
|
import json as _json
|
|
|
|
|
current_tokens += len(_json.dumps(inp, ensure_ascii=False))
|
|
|
|
|
if md_content: current_tokens += len(md_content)
|
|
|
|
|
current_tokens = max(1, int(current_tokens / _CHARS_PER_TOKEN))
|
|
|
|
|
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
|
|
|
|
return _add_bleed_derived({
|
|
|
|
|
"provider": "minimax",
|
|
|
|
|
"limit": limit_tokens,
|
|
|
|
|
"current": current_tokens,
|
|
|
|
|
"percentage": percentage,
|
|
|
|
|
})
|
|
|
|
|
return _add_bleed_derived({
|
|
|
|
|
"provider": _provider,
|
|
|
|
|
"limit": 0,
|
|
|
|
|
"current": 0,
|
|
|
|
|
"percentage": 0,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
def run_subagent_summarization(file_path: str, content: str, is_code: bool, outline: str) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Performs a stateless summarization request using a sub-agent prompt.
|
|
|
|
|
[C: src/summarize.py:summarise_file, tests/test_subagent_summarization.py:test_run_subagent_summarization_anthropic, tests/test_subagent_summarization.py:test_run_subagent_summarization_gemini]
|
|
|
|
|
Performs a stateless summarization request using a sub-agent prompt. [C: src/summarize.py:summarise_file, tests/test_subagent_summarization.py:test_run_subagent_summarization_anthropic, tests/test_subagent_summarization.py:test_run_subagent_summarization_gemini]
|
|
|
|
|
"""
|
|
|
|
|
prompt_tmpl = mma_prompts.TIER4_SUMMARIZE_CODE_PROMPT if is_code else mma_prompts.TIER4_SUMMARIZE_TEXT_PROMPT
|
|
|
|
|
prompt = prompt_tmpl.format(file_path=file_path, outline=outline, content=content)
|
|
|
|
|