diff --git a/src/ai_client.py b/src/ai_client.py
index d560ea67..473a8f03 100644
--- a/src/ai_client.py
+++ b/src/ai_client.py
@@ -42,7 +42,6 @@ from src import mcp_client
 from src import mma_prompts
 from src import performance_monitor
 from src import project_manager
-from src.openai_compatible import send_openai_compatible, OpenAICompatibleRequest, NormalizedResponse
 from src.vendor_capabilities import VendorCapabilities, get_capabilities
 
 # TODO(Ed): Eliminate these?
@@ -822,8 +821,9 @@ def run_with_tool_loop(
  on_pre_dispatch: Optional[Callable[[int, list[dict[str, Any]]], list[dict[str, Any]]]] = None,
 ) -> str:
  def _default_send(_round_idx: int) -> NormalizedResponse:
+  from src.openai_compatible import send_openai_compatible as _send_oc
   assert capabilities is not None, "capabilities required when send_func is not provided"
-  return send_openai_compatible(client, request_builder(_round_idx), capabilities=capabilities)
+  return _send_oc(client, request_builder(_round_idx), capabilities=capabilities)
  request_builder: Callable[[int], OpenAICompatibleRequest] = (request if callable(request) else (lambda _i: request))
  dispatch_send: Callable[[int], NormalizedResponse] = send_func or _default_send
  response_text: str = ""
@@ -1760,8 +1760,10 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
  qa_callback: Optional[Callable[[str], str]] = None,
  stream_callback: Optional[Callable[[str], None]] = None,
  patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
+ from src.openai_compatible import OpenAICompatibleRequest, NormalizedResponse
  """
  [C: src/ai_server.py:_handle_send]
+ [C: src/ai_server.py:_handle_send]
  """
  global _gemini_cli_adapter
  try:
@@ -2248,6 +2250,8 @@ def _send_grok(md_content: str, user_message: str, base_dir: str,
  qa_callback: Optional[Callable[[str], str]] = None,
  stream_callback: Optional[Callable[[str], None]] = None,
  patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
+ from src.openai_compatible import OpenAICompatibleRequest
+ client = _ensure_grok_client()
  client = _ensure_grok_client()
  tools: list[dict[str, Any]] | None = _get_deepseek_tools() or None
  with _grok_history_lock:
@@ -2260,6 +2264,7 @@ def _send_grok(md_content: str, user_message: str, base_dir: str,
    _grok_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
   else:
    _grok_history.append({"role": "user", "content": user_content})
+  _grok_history.append({"role": "user", "content": user_content})
  def _build_grok_request(_round_idx: int) -> OpenAICompatibleRequest:
   with _grok_history_lock:
    messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
@@ -2287,16 +2292,14 @@ def _send_minimax(md_content: str, user_message: str, base_dir: str,
  stream: bool = False,
  pre_tool_callback: Optional[Callable[[str, str, Optional[Callable[[str], str]]], Optional[str]]] = None,
  qa_callback: Optional[Callable[[str], str]] = None,
- stream_callback: Optional[Callable[[str], None]] = None,
  patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
+ from src.openai_compatible import OpenAICompatibleRequest
  _ensure_minimax_client()
- tools: list[dict[str, Any]] | None = _get_deepseek_tools() or None
- with _minimax_history_lock:
-  _repair_minimax_history(_minimax_history)
-  if discussion_history and not _minimax_history:
-   _minimax_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
-  else:
-   _minimax_history.append({"role": "user", "content": user_message})
+ _repair_minimax_history(_minimax_history)
+ if discussion_history and not _minimax_history:
+  _minimax_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
+ else:
+  _minimax_history.append({"role": "user", "content": user_message})
  def _build_minimax_request(_round_idx: int) -> OpenAICompatibleRequest:
   with _minimax_history_lock:
    messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
@@ -2454,6 +2457,7 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
  qa_callback: Optional[Callable[[str], str]] = None,
  stream_callback: Optional[Callable[[str], None]] = None,
  patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
+ from src.openai_compatible import OpenAICompatibleRequest
  client = _ensure_llama_client()
  tools: list[dict[str, Any]] | None = _get_deepseek_tools() or None
  with _llama_history_lock:
@@ -2466,6 +2470,8 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
    _llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
   else:
    _llama_history.append({"role": "user", "content": user_content})
+ def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
+   _llama_history.append({"role": "user", "content": user_content})
  def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
   with _llama_history_lock:
    messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
diff --git a/tests/test_ai_client_tool_loop.py b/tests/test_ai_client_tool_loop.py
index ee1b49c1..c777862f 100644
--- a/tests/test_ai_client_tool_loop.py
+++ b/tests/test_ai_client_tool_loop.py
@@ -34,7 +34,7 @@ def _make_normalized_response(text: str = "ok", tool_calls: list[dict[str, Any]]
 
 def test_run_with_tool_loop_no_tool_calls_returns_immediately(caps: VendorCapabilities) -> None:
  client = MagicMock()
- with patch("src.ai_client.send_openai_compatible", return_value=_make_normalized_response("hello")) as call:
+ with patch("src.openai_compatible.send_openai_compatible", return_value=_make_normalized_response("hello")) as call:
   result = run_with_tool_loop(
    client, OpenAICompatibleRequest(messages=[{"role": "user", "content": "x"}], model="m"),
    capabilities=caps,
@@ -50,7 +50,7 @@ def test_run_with_tool_loop_dispatches_tool_calls(caps: VendorCapabilities) -> N
   "first response", tool_calls=[{"id": "c1", "type": "function", "function": {"name": "read_file", "arguments": "{}"}}]
  )
  final_response = _make_normalized_response("after tool")
- with patch("src.ai_client.send_openai_compatible", side_effect=[tool_response, final_response]) as call, \
+ with patch("src.openai_compatible.send_openai_compatible", side_effect=[tool_response, final_response]) as call, \
       patch("src.ai_client._execute_tool_calls_concurrently", return_value=[("read_file", "c1", "result", "")]) as dispatch:
   result = run_with_tool_loop(
    client, OpenAICompatibleRequest(messages=[{"role": "user", "content": "x"}], model="m"),
@@ -67,7 +67,7 @@ def test_run_with_tool_loop_respects_max_rounds(caps: VendorCapabilities) -> Non
  infinite_tool_response = _make_normalized_response(
   "loop", tool_calls=[{"id": "c1", "type": "function", "function": {"name": "noop", "arguments": "{}"}}]
  )
- with patch("src.ai_client.send_openai_compatible", return_value=infinite_tool_response), \
+ with patch("src.openai_compatible.send_openai_compatible", return_value=infinite_tool_response), \
       patch("src.ai_client._execute_tool_calls_concurrently", return_value=[("noop", "c1", "result", "")]):
   result = run_with_tool_loop(
    client, OpenAICompatibleRequest(messages=[{"role": "user", "content": "x"}], model="m"),
@@ -83,7 +83,7 @@ def test_run_with_tool_loop_appends_to_history(caps: VendorCapabilities) -> None
  history_lock = MagicMock()
  history_lock.__enter__ = MagicMock(return_value=history_lock)
  history_lock.__exit__ = MagicMock(return_value=False)
- with patch("src.ai_client.send_openai_compatible", return_value=_make_normalized_response("hi")):
+ with patch("src.openai_compatible.send_openai_compatible", return_value=_make_normalized_response("hi")):
   run_with_tool_loop(
    client, OpenAICompatibleRequest(messages=[{"role": "user", "content": "x"}], model="m"),
    capabilities=caps,
@@ -98,7 +98,7 @@ def test_run_with_tool_loop_does_not_crash_on_tool_error(caps: VendorCapabilitie
   "err", tool_calls=[{"id": "c1", "type": "function", "function": {"name": "fail", "arguments": "{}"}}]
  )
  final_response = _make_normalized_response("recovered")
- with patch("src.ai_client.send_openai_compatible", side_effect=[tool_response, final_response]), \
+ with patch("src.openai_compatible.send_openai_compatible", side_effect=[tool_response, final_response]), \
       patch("src.ai_client._execute_tool_calls_concurrently", return_value=[("fail", "c1", "", "ToolExecutionError")]):
   result = run_with_tool_loop(
    client, OpenAICompatibleRequest(messages=[{"role": "user", "content": "x"}], model="m"),
diff --git a/tests/test_ai_client_tool_loop_builder.py b/tests/test_ai_client_tool_loop_builder.py
index d53630bb..be30b880 100644
--- a/tests/test_ai_client_tool_loop_builder.py
+++ b/tests/test_ai_client_tool_loop_builder.py
@@ -30,7 +30,7 @@ def test_run_with_tool_loop_calls_request_builder_each_round() -> None:
  def builder(round_idx: int) -> OpenAICompatibleRequest:
   builder_calls.append(round_idx)
   return OpenAICompatibleRequest(messages=[{"role": "user", "content": f"round={round_idx}"}], model="m")
- with patch("src.ai_client.send_openai_compatible", side_effect=[tool_response, final]), \
+ with patch("src.openai_compatible.send_openai_compatible", side_effect=[tool_response, final]), \
       patch("src.ai_client._execute_tool_calls_concurrently", return_value=[("noop", "c1", "r", "")]):
   result = run_with_tool_loop(
    client, builder, capabilities=caps,