feat(ai_client): add native Ollama adapter; route localhost to it
When _llama_base_url is localhost/127.0.0.1, _send_llama now
calls _send_llama_native (the native /api/chat adapter)
instead of the OpenAI-compat path. The native adapter
supports Ollama's vendor-specific fields: think, images,
thinking.
Functions added (in src/ai_client.py, per the naming
convention HARD RULE on no new src/*.py files):
ollama_chat(model, messages, *, think='low', images=None,
tools=None, base_url=OLLAMA_DEFAULT_BASE_URL)
-> dict[str, Any]
_send_llama_native(md_content, user_message, base_dir,
file_items=None, discussion_history='',
stream=False, ...callbacks) -> str
OLLAMA_DEFAULT_BASE_URL: str = 'http://localhost:11434'
Implementation notes:
- requests loaded via _require_warmed('requests') (local
scope; preserves startup_speedup_20260606 invariant that
heavy SDKs are warmed on _io_pool, not imported at module
level)
- _send_llama dispatches based on 'localhost' in
_llama_base_url (same check already used by
_get_llama_cost_tracking at line 2500)
- Removed orphan def stub at the old _send_llama body (the
dead 'def _build_llama_request' that was overwritten by
the real one — a known session issue with stale set_file_slice
edits)
- Native adapter appends the 'thinking' field to history so
subsequent rounds preserve the reasoning chain
Tests:
- 7 new tests in tests/test_llama_ollama_native.py:
* ollama_chat hits /api/chat (not /v1/chat/completions)
* ollama_chat includes 'think' param in payload
* ollama_chat includes 'images' in payload
* _send_llama_native wraps ollama_chat
* _send_llama_native preserves 'thinking' field
* _send_llama routes localhost to native (no openai client)
* _send_llama keeps openai path for non-local (no POST)
- Updated test_send_llama_ollama_backend in test_llama_provider.py
to mock the native path (was: mocked openai-compat; now:
mocked requests.post)
- 103/103 vendor+tool+provider+import-isolation tests pass
(no regressions; +7 new tests this commit)
- 4 audit scripts pass
This commit is contained in:
+54
-2
@@ -2460,6 +2460,8 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
|
||||
qa_callback: Optional[Callable[[str], str]] = None,
|
||||
stream_callback: Optional[Callable[[str], None]] = None,
|
||||
patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
|
||||
if "localhost" in _llama_base_url or "127.0.0.1" in _llama_base_url:
|
||||
return _send_llama_native(md_content, user_message, base_dir, file_items, discussion_history, stream, pre_tool_callback, qa_callback, stream_callback, patch_callback)
|
||||
from src.openai_compatible import OpenAICompatibleRequest
|
||||
client = _ensure_llama_client()
|
||||
tools: list[dict[str, Any]] | None = _get_deepseek_tools() or None
|
||||
@@ -2473,8 +2475,6 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
|
||||
_llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
|
||||
else:
|
||||
_llama_history.append({"role": "user", "content": user_content})
|
||||
def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
|
||||
_llama_history.append({"role": "user", "content": user_content})
|
||||
def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
|
||||
with _llama_history_lock:
|
||||
messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
|
||||
@@ -2492,6 +2492,58 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
|
||||
history_lock=_llama_history_lock, history=_llama_history,
|
||||
)
|
||||
|
||||
OLLAMA_DEFAULT_BASE_URL: str = "http://localhost:11434"
|
||||
|
||||
def ollama_chat(
|
||||
model: str,
|
||||
messages: list[dict[str, Any]],
|
||||
*,
|
||||
think: str = "low",
|
||||
images: list[str] | None = None,
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
base_url: str = OLLAMA_DEFAULT_BASE_URL,
|
||||
) -> dict[str, Any]:
|
||||
requests = _require_warmed("requests")
|
||||
payload: dict[str, Any] = {"model": model, "messages": messages, "stream": False}
|
||||
if think:
|
||||
payload["think"] = think
|
||||
if images:
|
||||
payload["images"] = images
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
resp = requests.post(f"{base_url}/api/chat", json=payload, timeout=120)
|
||||
return resp.json()
|
||||
|
||||
def _send_llama_native(md_content: str, user_message: str, base_dir: str,
|
||||
file_items: list[dict[str, Any]] | None = None,
|
||||
discussion_history: str = "",
|
||||
stream: bool = False,
|
||||
pre_tool_callback: Optional[Callable[[str, str, Optional[Callable[[str], str]]], Optional[str]]] = None,
|
||||
qa_callback: Optional[Callable[[str], str]] = None,
|
||||
stream_callback: Optional[Callable[[str], None]] = None,
|
||||
patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
|
||||
base_url = _llama_base_url.replace("/v1", "")
|
||||
with _llama_history_lock:
|
||||
if discussion_history and not _llama_history:
|
||||
_llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
|
||||
else:
|
||||
_llama_history.append({"role": "user", "content": user_message})
|
||||
messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
|
||||
messages.extend(_llama_history)
|
||||
images: list[str] = []
|
||||
if file_items:
|
||||
for fi in file_items:
|
||||
if fi.get("is_image") and fi.get("base64_data"):
|
||||
images.append(fi["base64_data"])
|
||||
response = ollama_chat(_model, messages, images=images, base_url=base_url)
|
||||
text = response.get("message", {}).get("content", "")
|
||||
thinking = response.get("message", {}).get("thinking", "")
|
||||
with _llama_history_lock:
|
||||
msg: dict[str, Any] = {"role": "assistant", "content": text or None}
|
||||
if thinking:
|
||||
msg["thinking"] = thinking
|
||||
_llama_history.append(msg)
|
||||
return (f"<thinking>\n{thinking}\n</thinking>\n" if thinking else "") + text
|
||||
def _list_llama_models() -> list[str]:
|
||||
from src.vendor_capabilities import list_models_for_vendor
|
||||
return list_models_for_vendor("llama")
|
||||
|
||||
Reference in New Issue
Block a user