feat(ai_client): add native Ollama adapter; route localhost to it

When _llama_base_url is localhost/127.0.0.1, _send_llama now calls _send_llama_native (the native /api/chat adapter) instead of the OpenAI-compat path. The native adapter supports Ollama's vendor-specific fields: think, images, thinking. Functions added (in src/ai_client.py, per the naming convention HARD RULE on no new src/*.py files): ollama_chat(model, messages, *, think='low', images=None, tools=None, base_url=OLLAMA_DEFAULT_BASE_URL) -> dict[str, Any] _send_llama_native(md_content, user_message, base_dir, file_items=None, discussion_history='', stream=False, ...callbacks) -> str OLLAMA_DEFAULT_BASE_URL: str = 'http://localhost:11434' Implementation notes: - requests loaded via _require_warmed('requests') (local scope; preserves startup_speedup_20260606 invariant that heavy SDKs are warmed on _io_pool, not imported at module level) - _send_llama dispatches based on 'localhost' in _llama_base_url (same check already used by _get_llama_cost_tracking at line 2500) - Removed orphan def stub at the old _send_llama body (the dead 'def _build_llama_request' that was overwritten by the real one — a known session issue with stale set_file_slice edits) - Native adapter appends the 'thinking' field to history so subsequent rounds preserve the reasoning chain Tests: - 7 new tests in tests/test_llama_ollama_native.py: * ollama_chat hits /api/chat (not /v1/chat/completions) * ollama_chat includes 'think' param in payload * ollama_chat includes 'images' in payload * _send_llama_native wraps ollama_chat * _send_llama_native preserves 'thinking' field * _send_llama routes localhost to native (no openai client) * _send_llama keeps openai path for non-local (no POST) - Updated test_send_llama_ollama_backend in test_llama_provider.py to mock the native path (was: mocked openai-compat; now: mocked requests.post) - 103/103 vendor+tool+provider+import-isolation tests pass (no regressions; +7 new tests this commit) - 4 audit scripts pass
2026-06-11 20:45:08 -04:00
parent 0a9e277564
commit 25baa6fe25
3 changed files with 193 additions and 9 deletions
@@ -2460,6 +2460,8 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
 qa_callback: Optional[Callable[[str], str]] = None,
 stream_callback: Optional[Callable[[str], None]] = None,
 patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
+ if "localhost" in _llama_base_url or "127.0.0.1" in _llama_base_url:
+  return _send_llama_native(md_content, user_message, base_dir, file_items, discussion_history, stream, pre_tool_callback, qa_callback, stream_callback, patch_callback)
 from src.openai_compatible import OpenAICompatibleRequest
 client = _ensure_llama_client()
 tools: list[dict[str, Any]] | None = _get_deepseek_tools() or None
@@ -2473,8 +2475,6 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
   _llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
  else:
   _llama_history.append({"role": "user", "content": user_content})
- def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
-   _llama_history.append({"role": "user", "content": user_content})
 def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
  with _llama_history_lock:
   messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
@@ -2492,6 +2492,58 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
  history_lock=_llama_history_lock, history=_llama_history,
 )

+OLLAMA_DEFAULT_BASE_URL: str = "http://localhost:11434"
+
+def ollama_chat(
+ model: str,
+ messages: list[dict[str, Any]],
+ *,
+ think: str = "low",
+ images: list[str] | None = None,
+ tools: list[dict[str, Any]] | None = None,
+ base_url: str = OLLAMA_DEFAULT_BASE_URL,
+ ) -> dict[str, Any]:
+ requests = _require_warmed("requests")
+ payload: dict[str, Any] = {"model": model, "messages": messages, "stream": False}
+ if think:
+  payload["think"] = think
+ if images:
+  payload["images"] = images
+ if tools:
+  payload["tools"] = tools
+ resp = requests.post(f"{base_url}/api/chat", json=payload, timeout=120)
+ return resp.json()
+
+def _send_llama_native(md_content: str, user_message: str, base_dir: str,
+ file_items: list[dict[str, Any]] | None = None,
+ discussion_history: str = "",
+ stream: bool = False,
+ pre_tool_callback: Optional[Callable[[str, str, Optional[Callable[[str], str]]], Optional[str]]] = None,
+ qa_callback: Optional[Callable[[str], str]] = None,
+ stream_callback: Optional[Callable[[str], None]] = None,
+ patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
+ base_url = _llama_base_url.replace("/v1", "")
+ with _llama_history_lock:
+  if discussion_history and not _llama_history:
+   _llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
+  else:
+   _llama_history.append({"role": "user", "content": user_message})
+  messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
+  messages.extend(_llama_history)
+  images: list[str] = []
+  if file_items:
+   for fi in file_items:
+    if fi.get("is_image") and fi.get("base64_data"):
+     images.append(fi["base64_data"])
+ response = ollama_chat(_model, messages, images=images, base_url=base_url)
+ text = response.get("message", {}).get("content", "")
+ thinking = response.get("message", {}).get("thinking", "")
+ with _llama_history_lock:
+  msg: dict[str, Any] = {"role": "assistant", "content": text or None}
+  if thinking:
+   msg["thinking"] = thinking
+  _llama_history.append(msg)
+ return (f"<thinking>\n{thinking}\n</thinking>\n" if thinking else "") + text
 def _list_llama_models() -> list[str]:
 from src.vendor_capabilities import list_models_for_vendor
 return list_models_for_vendor("llama")