diff --git a/src/ai_client.py b/src/ai_client.py index b5b86e36..47fcec94 100644 --- a/src/ai_client.py +++ b/src/ai_client.py @@ -2230,224 +2230,43 @@ def _send_minimax(md_content: str, user_message: str, base_dir: str, qa_callback: Optional[Callable[[str], str]] = None, stream_callback: Optional[Callable[[str], None]] = None, patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str: - """ - [C: src/ai_server.py:_handle_send] - """ - openai = _require_warmed("openai") - requests = _require_warmed("requests") - try: - mcp_client.configure(file_items or [], [base_dir]) - creds = _load_credentials() - api_key = creds.get("minimax", {}).get("api_key") - if not api_key: - raise ValueError("MiniMax API key not found in credentials.toml") - - client = OpenAI(api_key=api_key, base_url="https://api.minimax.io/v1") - - with _minimax_history_lock: - _repair_minimax_history(_minimax_history) - if discussion_history and not _minimax_history: - user_content = f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}" - else: - user_content = user_message - _minimax_history.append({"role": "user", "content": user_content}) - - all_text_parts: list[str] = [] - _cumulative_tool_bytes = 0 - - for round_idx in range(MAX_TOOL_ROUNDS + 2): - current_api_messages: list[dict[str, Any]] = [] - - sys_msg = {"role": "system", "content": f"{_get_combined_system_prompt()}\n\n\n{md_content}\n"} - current_api_messages.append(sys_msg) - - with _minimax_history_lock: - dropped = _trim_minimax_history([sys_msg], _minimax_history) - if dropped > 0: - _append_comms("OUT", "request", {"message": f"[MINIMAX HISTORY TRIMMED: dropped {dropped} old messages]"}) - - for i, msg in enumerate(_minimax_history): - role = msg.get("role") - api_msg = {"role": role} - - content = msg.get("content") - if role == "assistant": - if msg.get("tool_calls"): - api_msg["content"] = content or None - api_msg["tool_calls"] = msg["tool_calls"] - else: - api_msg["content"] = content or "" - elif role == "tool": - api_msg["content"] = content or "" - api_msg["tool_call_id"] = msg.get("tool_call_id") - else: - api_msg["content"] = content or "" - - current_api_messages.append(api_msg) - - request_payload: dict[str, Any] = { - "model": _model, - "messages": current_api_messages, - "stream": stream, - "extra_body": {"reasoning_split": True}, - } - - if stream: - request_payload["stream_options"] = {"include_usage": True} - - request_payload["temperature"] = 1.0 - request_payload["top_p"] = _top_p - request_payload["max_tokens"] = min(_max_tokens, 8192) - - tools = _get_deepseek_tools() - if tools: - request_payload["tools"] = tools - - events.emit("request_start", payload={"provider": "minimax", "model": _model, "round": round_idx, "streaming": stream}) - - try: - response = client.chat.completions.create(**request_payload, timeout=120) - except Exception as e: - raise _classify_minimax_error(e) from e - - assistant_text = "" - tool_calls_raw = [] - reasoning_content = "" - finish_reason = "stop" - usage = {} - - if stream: - aggregated_content = "" - aggregated_tool_calls: list[dict[str, Any]] = [] - aggregated_reasoning = "" - current_usage: dict[str, Any] = {} - final_finish_reason = "stop" - - for chunk in response: - if not chunk.choices: - if chunk.usage: - current_usage = chunk.usage.model_dump() - continue - - delta = chunk.choices[0].delta - if delta.content: - content_chunk = delta.content - aggregated_content += content_chunk - if stream_callback: - stream_callback(content_chunk) - - if hasattr(delta, "reasoning_details") and delta.reasoning_details: - for detail in delta.reasoning_details: - if "text" in detail: - aggregated_reasoning += detail["text"] - - if delta.tool_calls: - for tc_delta in delta.tool_calls: - idx = tc_delta.index - while len(aggregated_tool_calls) <= idx: - aggregated_tool_calls.append({"id": "", "type": "function", "function": {"name": "", "arguments": ""}}) - target = aggregated_tool_calls[idx] - if tc_delta.id: - target["id"] = tc_delta.id - if tc_delta.function and tc_delta.function.name: - target["function"]["name"] += tc_delta.function.name - if tc_delta.function and tc_delta.function.arguments: - target["function"]["arguments"] += tc_delta.function.arguments - - if chunk.choices[0].finish_reason: - final_finish_reason = chunk.choices[0].finish_reason - if chunk.usage: - current_usage = chunk.usage.model_dump() - - assistant_text = aggregated_content - tool_calls_raw = aggregated_tool_calls - reasoning_content = aggregated_reasoning - finish_reason = final_finish_reason - usage = current_usage - else: - choice = response.choices[0] - message = choice.message - assistant_text = message.content or "" - tool_calls_raw = message.tool_calls or [] - if hasattr(message, "reasoning_details") and message.reasoning_details: - reasoning_content = message.reasoning_details[0].get("text", "") if message.reasoning_details else "" - finish_reason = choice.finish_reason or "stop" - usage = response.usage.model_dump() if response.usage else {} - - thinking_tags = "" - if reasoning_content: - thinking_tags = f"\n{reasoning_content}\n\n" - full_assistant_text = thinking_tags + assistant_text - - with _minimax_history_lock: - msg_to_store: dict[str, Any] = {"role": "assistant", "content": assistant_text or None} - if reasoning_content: - msg_to_store["reasoning_content"] = reasoning_content - if tool_calls_raw: - msg_to_store["tool_calls"] = tool_calls_raw - _minimax_history.append(msg_to_store) - - if full_assistant_text: - all_text_parts.append(full_assistant_text) - - _append_comms("IN", "response", { - "round": round_idx, - "stop_reason": finish_reason, - "text": full_assistant_text, - "tool_calls": tool_calls_raw, - "usage": usage, - "streaming": stream - }) - - if finish_reason != "tool_calls" and not tool_calls_raw: - break - if round_idx > MAX_TOOL_ROUNDS: - break - - try: - loop = asyncio.get_running_loop() - results = asyncio.run_coroutine_threadsafe( - _execute_tool_calls_concurrently(tool_calls_raw, base_dir, pre_tool_callback, qa_callback, round_idx, "minimax", patch_callback), - loop - ).result() - except RuntimeError: - results = asyncio.run(_execute_tool_calls_concurrently(tool_calls_raw, base_dir, pre_tool_callback, qa_callback, round_idx, "minimax", patch_callback)) - - tool_results_for_history: list[dict[str, Any]] = [] - for i, (name, call_id, out, _) in enumerate(results): - if i == len(results) - 1: - if file_items: - file_items, changed = _reread_file_items(file_items) - ctx = _build_file_diff_text(changed) - if ctx: - out += f"\n\n{_get_context_marker()}\n\n{ctx}" - if round_idx == MAX_TOOL_ROUNDS: - out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]" - - truncated = _truncate_tool_output(out) - _cumulative_tool_bytes += len(truncated) - tool_results_for_history.append({ - "role": "tool", - "tool_call_id": call_id, - "content": truncated, - }) - _append_comms("IN", "tool_result", {"name": name, "id": call_id, "output": out}) - events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": round_idx}) - - if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES: - tool_results_for_history.append({ - "role": "user", - "content": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now." - }) - _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"}) - - with _minimax_history_lock: - for tr in tool_results_for_history: - _minimax_history.append(tr) - - return "\n\n".join(all_text_parts) if all_text_parts else "(No text returned)" - except Exception as e: - raise _classify_minimax_error(e) from e + _ensure_minimax_client() + from src.openai_compatible import OpenAICompatibleRequest, send_openai_compatible + from src.vendor_capabilities import get_capabilities + with _minimax_history_lock: + _repair_minimax_history(_minimax_history) + if discussion_history and not _minimax_history: + _minimax_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"}) + else: + _minimax_history.append({"role": "user", "content": user_message}) + messages = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n\n{md_content}\n"}] + messages.extend(_minimax_history) + request = OpenAICompatibleRequest( + messages=messages, + model=_model, + temperature=_temperature, + top_p=_top_p, + max_tokens=min(_max_tokens, 8192), + stream=stream, + stream_callback=stream_callback, + ) + caps = get_capabilities("minimax", _model) + response = send_openai_compatible(_minimax_client, request, capabilities=caps) + reasoning_content = "" + if response.raw_response and hasattr(response.raw_response, "choices"): + choice = response.raw_response.choices[0] + if hasattr(choice.message, "reasoning_details") and choice.message.reasoning_details: + reasoning_content = choice.message.reasoning_details[0].get("text", "") if choice.message.reasoning_details else "" + thinking_tags = "" + if reasoning_content: + thinking_tags = f"\n{reasoning_content}\n\n" + full_text = thinking_tags + response.text + with _minimax_history_lock: + msg_to_store: dict[str, Any] = {"role": "assistant", "content": response.text or None} + if reasoning_content: + msg_to_store["reasoning_content"] = reasoning_content + _minimax_history.append(msg_to_store) + return full_text #endregion: MiniMax Provider