Private
Public Access
0
0

feat(ai_client): add native Ollama adapter; route localhost to it

When _llama_base_url is localhost/127.0.0.1, _send_llama now
calls _send_llama_native (the native /api/chat adapter)
instead of the OpenAI-compat path. The native adapter
supports Ollama's vendor-specific fields: think, images,
thinking.

Functions added (in src/ai_client.py, per the naming
convention HARD RULE on no new src/*.py files):

  ollama_chat(model, messages, *, think='low', images=None,
              tools=None, base_url=OLLAMA_DEFAULT_BASE_URL)
    -> dict[str, Any]

  _send_llama_native(md_content, user_message, base_dir,
                     file_items=None, discussion_history='',
                     stream=False, ...callbacks) -> str

  OLLAMA_DEFAULT_BASE_URL: str = 'http://localhost:11434'

Implementation notes:
- requests loaded via _require_warmed('requests') (local
  scope; preserves startup_speedup_20260606 invariant that
  heavy SDKs are warmed on _io_pool, not imported at module
  level)
- _send_llama dispatches based on 'localhost' in
  _llama_base_url (same check already used by
  _get_llama_cost_tracking at line 2500)
- Removed orphan def stub at the old _send_llama body (the
  dead 'def _build_llama_request' that was overwritten by
  the real one — a known session issue with stale set_file_slice
  edits)
- Native adapter appends the 'thinking' field to history so
  subsequent rounds preserve the reasoning chain

Tests:
- 7 new tests in tests/test_llama_ollama_native.py:
  * ollama_chat hits /api/chat (not /v1/chat/completions)
  * ollama_chat includes 'think' param in payload
  * ollama_chat includes 'images' in payload
  * _send_llama_native wraps ollama_chat
  * _send_llama_native preserves 'thinking' field
  * _send_llama routes localhost to native (no openai client)
  * _send_llama keeps openai path for non-local (no POST)
- Updated test_send_llama_ollama_backend in test_llama_provider.py
  to mock the native path (was: mocked openai-compat; now:
  mocked requests.post)
- 103/103 vendor+tool+provider+import-isolation tests pass
  (no regressions; +7 new tests this commit)
- 4 audit scripts pass
This commit is contained in:
2026-06-11 20:45:08 -04:00
parent 0a9e277564
commit 25baa6fe25
3 changed files with 193 additions and 9 deletions
+54 -2
View File
@@ -2460,6 +2460,8 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
qa_callback: Optional[Callable[[str], str]] = None,
stream_callback: Optional[Callable[[str], None]] = None,
patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
if "localhost" in _llama_base_url or "127.0.0.1" in _llama_base_url:
return _send_llama_native(md_content, user_message, base_dir, file_items, discussion_history, stream, pre_tool_callback, qa_callback, stream_callback, patch_callback)
from src.openai_compatible import OpenAICompatibleRequest
client = _ensure_llama_client()
tools: list[dict[str, Any]] | None = _get_deepseek_tools() or None
@@ -2473,8 +2475,6 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
_llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
else:
_llama_history.append({"role": "user", "content": user_content})
def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
_llama_history.append({"role": "user", "content": user_content})
def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
with _llama_history_lock:
messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
@@ -2492,6 +2492,58 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
history_lock=_llama_history_lock, history=_llama_history,
)
OLLAMA_DEFAULT_BASE_URL: str = "http://localhost:11434"
def ollama_chat(
model: str,
messages: list[dict[str, Any]],
*,
think: str = "low",
images: list[str] | None = None,
tools: list[dict[str, Any]] | None = None,
base_url: str = OLLAMA_DEFAULT_BASE_URL,
) -> dict[str, Any]:
requests = _require_warmed("requests")
payload: dict[str, Any] = {"model": model, "messages": messages, "stream": False}
if think:
payload["think"] = think
if images:
payload["images"] = images
if tools:
payload["tools"] = tools
resp = requests.post(f"{base_url}/api/chat", json=payload, timeout=120)
return resp.json()
def _send_llama_native(md_content: str, user_message: str, base_dir: str,
file_items: list[dict[str, Any]] | None = None,
discussion_history: str = "",
stream: bool = False,
pre_tool_callback: Optional[Callable[[str, str, Optional[Callable[[str], str]]], Optional[str]]] = None,
qa_callback: Optional[Callable[[str], str]] = None,
stream_callback: Optional[Callable[[str], None]] = None,
patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
base_url = _llama_base_url.replace("/v1", "")
with _llama_history_lock:
if discussion_history and not _llama_history:
_llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
else:
_llama_history.append({"role": "user", "content": user_message})
messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
messages.extend(_llama_history)
images: list[str] = []
if file_items:
for fi in file_items:
if fi.get("is_image") and fi.get("base64_data"):
images.append(fi["base64_data"])
response = ollama_chat(_model, messages, images=images, base_url=base_url)
text = response.get("message", {}).get("content", "")
thinking = response.get("message", {}).get("thinking", "")
with _llama_history_lock:
msg: dict[str, Any] = {"role": "assistant", "content": text or None}
if thinking:
msg["thinking"] = thinking
_llama_history.append(msg)
return (f"<thinking>\n{thinking}\n</thinking>\n" if thinking else "") + text
def _list_llama_models() -> list[str]:
from src.vendor_capabilities import list_models_for_vendor
return list_models_for_vendor("llama")