feat(ai_client): add native Ollama adapter; route localhost to it
When _llama_base_url is localhost/127.0.0.1, _send_llama now
calls _send_llama_native (the native /api/chat adapter)
instead of the OpenAI-compat path. The native adapter
supports Ollama's vendor-specific fields: think, images,
thinking.
Functions added (in src/ai_client.py, per the naming
convention HARD RULE on no new src/*.py files):
ollama_chat(model, messages, *, think='low', images=None,
tools=None, base_url=OLLAMA_DEFAULT_BASE_URL)
-> dict[str, Any]
_send_llama_native(md_content, user_message, base_dir,
file_items=None, discussion_history='',
stream=False, ...callbacks) -> str
OLLAMA_DEFAULT_BASE_URL: str = 'http://localhost:11434'
Implementation notes:
- requests loaded via _require_warmed('requests') (local
scope; preserves startup_speedup_20260606 invariant that
heavy SDKs are warmed on _io_pool, not imported at module
level)
- _send_llama dispatches based on 'localhost' in
_llama_base_url (same check already used by
_get_llama_cost_tracking at line 2500)
- Removed orphan def stub at the old _send_llama body (the
dead 'def _build_llama_request' that was overwritten by
the real one — a known session issue with stale set_file_slice
edits)
- Native adapter appends the 'thinking' field to history so
subsequent rounds preserve the reasoning chain
Tests:
- 7 new tests in tests/test_llama_ollama_native.py:
* ollama_chat hits /api/chat (not /v1/chat/completions)
* ollama_chat includes 'think' param in payload
* ollama_chat includes 'images' in payload
* _send_llama_native wraps ollama_chat
* _send_llama_native preserves 'thinking' field
* _send_llama routes localhost to native (no openai client)
* _send_llama keeps openai path for non-local (no POST)
- Updated test_send_llama_ollama_backend in test_llama_provider.py
to mock the native path (was: mocked openai-compat; now:
mocked requests.post)
- 103/103 vendor+tool+provider+import-isolation tests pass
(no regressions; +7 new tests this commit)
- 4 audit scripts pass
This commit is contained in:
+54
-2
@@ -2460,6 +2460,8 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
|
|||||||
qa_callback: Optional[Callable[[str], str]] = None,
|
qa_callback: Optional[Callable[[str], str]] = None,
|
||||||
stream_callback: Optional[Callable[[str], None]] = None,
|
stream_callback: Optional[Callable[[str], None]] = None,
|
||||||
patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
|
patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
|
||||||
|
if "localhost" in _llama_base_url or "127.0.0.1" in _llama_base_url:
|
||||||
|
return _send_llama_native(md_content, user_message, base_dir, file_items, discussion_history, stream, pre_tool_callback, qa_callback, stream_callback, patch_callback)
|
||||||
from src.openai_compatible import OpenAICompatibleRequest
|
from src.openai_compatible import OpenAICompatibleRequest
|
||||||
client = _ensure_llama_client()
|
client = _ensure_llama_client()
|
||||||
tools: list[dict[str, Any]] | None = _get_deepseek_tools() or None
|
tools: list[dict[str, Any]] | None = _get_deepseek_tools() or None
|
||||||
@@ -2473,8 +2475,6 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
|
|||||||
_llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
|
_llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
|
||||||
else:
|
else:
|
||||||
_llama_history.append({"role": "user", "content": user_content})
|
_llama_history.append({"role": "user", "content": user_content})
|
||||||
def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
|
|
||||||
_llama_history.append({"role": "user", "content": user_content})
|
|
||||||
def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
|
def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
|
||||||
with _llama_history_lock:
|
with _llama_history_lock:
|
||||||
messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
|
messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
|
||||||
@@ -2492,6 +2492,58 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
|
|||||||
history_lock=_llama_history_lock, history=_llama_history,
|
history_lock=_llama_history_lock, history=_llama_history,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
OLLAMA_DEFAULT_BASE_URL: str = "http://localhost:11434"
|
||||||
|
|
||||||
|
def ollama_chat(
|
||||||
|
model: str,
|
||||||
|
messages: list[dict[str, Any]],
|
||||||
|
*,
|
||||||
|
think: str = "low",
|
||||||
|
images: list[str] | None = None,
|
||||||
|
tools: list[dict[str, Any]] | None = None,
|
||||||
|
base_url: str = OLLAMA_DEFAULT_BASE_URL,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
requests = _require_warmed("requests")
|
||||||
|
payload: dict[str, Any] = {"model": model, "messages": messages, "stream": False}
|
||||||
|
if think:
|
||||||
|
payload["think"] = think
|
||||||
|
if images:
|
||||||
|
payload["images"] = images
|
||||||
|
if tools:
|
||||||
|
payload["tools"] = tools
|
||||||
|
resp = requests.post(f"{base_url}/api/chat", json=payload, timeout=120)
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
def _send_llama_native(md_content: str, user_message: str, base_dir: str,
|
||||||
|
file_items: list[dict[str, Any]] | None = None,
|
||||||
|
discussion_history: str = "",
|
||||||
|
stream: bool = False,
|
||||||
|
pre_tool_callback: Optional[Callable[[str, str, Optional[Callable[[str], str]]], Optional[str]]] = None,
|
||||||
|
qa_callback: Optional[Callable[[str], str]] = None,
|
||||||
|
stream_callback: Optional[Callable[[str], None]] = None,
|
||||||
|
patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
|
||||||
|
base_url = _llama_base_url.replace("/v1", "")
|
||||||
|
with _llama_history_lock:
|
||||||
|
if discussion_history and not _llama_history:
|
||||||
|
_llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
|
||||||
|
else:
|
||||||
|
_llama_history.append({"role": "user", "content": user_message})
|
||||||
|
messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
|
||||||
|
messages.extend(_llama_history)
|
||||||
|
images: list[str] = []
|
||||||
|
if file_items:
|
||||||
|
for fi in file_items:
|
||||||
|
if fi.get("is_image") and fi.get("base64_data"):
|
||||||
|
images.append(fi["base64_data"])
|
||||||
|
response = ollama_chat(_model, messages, images=images, base_url=base_url)
|
||||||
|
text = response.get("message", {}).get("content", "")
|
||||||
|
thinking = response.get("message", {}).get("thinking", "")
|
||||||
|
with _llama_history_lock:
|
||||||
|
msg: dict[str, Any] = {"role": "assistant", "content": text or None}
|
||||||
|
if thinking:
|
||||||
|
msg["thinking"] = thinking
|
||||||
|
_llama_history.append(msg)
|
||||||
|
return (f"<thinking>\n{thinking}\n</thinking>\n" if thinking else "") + text
|
||||||
def _list_llama_models() -> list[str]:
|
def _list_llama_models() -> list[str]:
|
||||||
from src.vendor_capabilities import list_models_for_vendor
|
from src.vendor_capabilities import list_models_for_vendor
|
||||||
return list_models_for_vendor("llama")
|
return list_models_for_vendor("llama")
|
||||||
|
|||||||
@@ -0,0 +1,128 @@
|
|||||||
|
"""Red tests for native Ollama adapter (_send_llama_native + ollama_chat).
|
||||||
|
|
||||||
|
When _llama_base_url points at localhost/127.0.0.1 (Ollama default), _send_llama
|
||||||
|
should route to a native adapter that POSTs to /api/chat (NOT the OpenAI-compat
|
||||||
|
/v1/chat/completions endpoint). The native adapter supports Ollama's vendor-
|
||||||
|
specific fields: think, images, thinking.
|
||||||
|
|
||||||
|
This file is t4_2 (red phase) of qwen_llama_grok_followup_20260611 Phase 4.
|
||||||
|
"""
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
import pytest
|
||||||
|
from src import ai_client
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _reset_llama_state():
|
||||||
|
if hasattr(ai_client, '_llama_client'):
|
||||||
|
ai_client._llama_client = None
|
||||||
|
if hasattr(ai_client, '_llama_history'):
|
||||||
|
ai_client._llama_history = []
|
||||||
|
if hasattr(ai_client, '_llama_base_url'):
|
||||||
|
ai_client._llama_base_url = "http://localhost:11434/v1"
|
||||||
|
if hasattr(ai_client, '_llama_api_key'):
|
||||||
|
ai_client._llama_api_key = "ollama"
|
||||||
|
yield
|
||||||
|
|
||||||
|
def _mock_requests_with(post_response: MagicMock):
|
||||||
|
"""Return a context manager that patches _require_warmed('requests') with a mock whose .post returns the given response."""
|
||||||
|
mock_requests = MagicMock()
|
||||||
|
mock_requests.post.return_value = post_response
|
||||||
|
return patch("src.ai_client._require_warmed", return_value=mock_requests)
|
||||||
|
|
||||||
|
def test_ollama_chat_posts_to_native_api_chat_endpoint() -> None:
|
||||||
|
"""ollama_chat hits /api/chat (not /v1/chat/completions) and returns parsed JSON."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
"message": {"role": "assistant", "content": "ok"},
|
||||||
|
"done": True,
|
||||||
|
}
|
||||||
|
with _mock_requests_with(mock_response) as warm:
|
||||||
|
result = ai_client.ollama_chat(model="llama3.2:3b", messages=[{"role": "user", "content": "hi"}])
|
||||||
|
assert result["message"]["content"] == "ok"
|
||||||
|
post = warm.return_value.post
|
||||||
|
called_url = post.call_args.args[0]
|
||||||
|
assert called_url == "http://localhost:11434/api/chat"
|
||||||
|
payload = post.call_args.kwargs["json"]
|
||||||
|
assert payload["model"] == "llama3.2:3b"
|
||||||
|
assert payload["stream"] is False
|
||||||
|
assert payload["messages"] == [{"role": "user", "content": "hi"}]
|
||||||
|
|
||||||
|
def test_ollama_chat_includes_think_param_when_set() -> None:
|
||||||
|
"""Ollama native adapter should set the 'think' field in the payload."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.json.return_value = {"message": {"content": "ok"}, "done": True}
|
||||||
|
with _mock_requests_with(mock_response) as warm:
|
||||||
|
ai_client.ollama_chat(model="qwen3:8b", messages=[{"role": "user", "content": "x"}], think="high")
|
||||||
|
payload = warm.return_value.post.call_args.kwargs["json"]
|
||||||
|
assert payload["think"] == "high"
|
||||||
|
|
||||||
|
def test_ollama_chat_includes_images_when_provided() -> None:
|
||||||
|
"""Ollama native adapter should include images in the payload (base64 strings)."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.json.return_value = {"message": {"content": "i see a cat"}, "done": True}
|
||||||
|
with _mock_requests_with(mock_response) as warm:
|
||||||
|
ai_client.ollama_chat(
|
||||||
|
model="llama3.2-vision:11b",
|
||||||
|
messages=[{"role": "user", "content": "describe this"}],
|
||||||
|
images=["iVBOR..."],
|
||||||
|
)
|
||||||
|
payload = warm.return_value.post.call_args.kwargs["json"]
|
||||||
|
assert payload["images"] == ["iVBOR..."]
|
||||||
|
|
||||||
|
def test_send_llama_native_calls_ollama_chat_when_localhost() -> None:
|
||||||
|
"""_send_llama_native wraps ollama_chat and returns the message content."""
|
||||||
|
ai_client.set_provider("llama", "llama-3.2-3b-preview")
|
||||||
|
ai_client._llama_base_url = "http://localhost:11434/v1"
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
"message": {"role": "assistant", "content": "hi from native ollama"},
|
||||||
|
"done": True,
|
||||||
|
}
|
||||||
|
with _mock_requests_with(mock_response):
|
||||||
|
result = ai_client._send_llama_native("system", "user", ".", None, "", False, None, None, None)
|
||||||
|
assert "hi from native ollama" in result
|
||||||
|
|
||||||
|
def test_send_llama_native_preserves_thinking_field() -> None:
|
||||||
|
"""Ollama's 'thinking' field should be captured and rendered in the output."""
|
||||||
|
ai_client.set_provider("llama", "qwen3:8b")
|
||||||
|
ai_client._llama_base_url = "http://localhost:11434/v1"
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
"message": {"role": "assistant", "content": "answer", "thinking": "I thought about it"},
|
||||||
|
"done": True,
|
||||||
|
}
|
||||||
|
with _mock_requests_with(mock_response):
|
||||||
|
result = ai_client._send_llama_native("system", "user", ".", None, "", False, None, None, None)
|
||||||
|
assert "I thought about it" in result
|
||||||
|
assert "answer" in result
|
||||||
|
|
||||||
|
def test_send_llama_routes_to_native_when_localhost() -> None:
|
||||||
|
"""The dispatcher in _send_llama must route localhost/127.0.0.1 to _send_llama_native."""
|
||||||
|
ai_client.set_provider("llama", "llama-3.2-3b-preview")
|
||||||
|
ai_client._llama_base_url = "http://localhost:11434/v1"
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
"message": {"role": "assistant", "content": "via native"},
|
||||||
|
"done": True,
|
||||||
|
}
|
||||||
|
with _mock_requests_with(mock_response), \
|
||||||
|
patch("src.ai_client._ensure_llama_client") as ensure:
|
||||||
|
result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None)
|
||||||
|
assert "via native" in result
|
||||||
|
assert not ensure.called, "_send_llama should NOT instantiate the openai client for native backend"
|
||||||
|
|
||||||
|
def test_send_llama_keeps_openai_path_for_non_local() -> None:
|
||||||
|
"""_send_llama must NOT route to native for non-localhost URLs (custom server, OpenRouter)."""
|
||||||
|
ai_client.set_provider("llama", "llama-3.1-70b-versatile")
|
||||||
|
ai_client._llama_base_url = "https://openrouter.ai/api/v1"
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.chat.completions.create.return_value = MagicMock(
|
||||||
|
choices=[MagicMock(message=MagicMock(content="via openrouter", tool_calls=[]))],
|
||||||
|
usage=MagicMock(prompt_tokens=5, completion_tokens=3),
|
||||||
|
)
|
||||||
|
with patch("src.ai_client._ensure_llama_client", return_value=mock_client) as ensure, \
|
||||||
|
_mock_requests_with(MagicMock(json=MagicMock(return_value={}))) as warm:
|
||||||
|
result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None)
|
||||||
|
assert "via openrouter" in result
|
||||||
|
assert ensure.called
|
||||||
|
assert not warm.return_value.post.called, "non-local backend must NOT hit Ollama's /api/chat"
|
||||||
@@ -17,14 +17,18 @@ def _reset_llama_state():
|
|||||||
def test_send_llama_ollama_backend(monkeypatch: pytest.MonkeyPatch) -> None:
|
def test_send_llama_ollama_backend(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
ai_client._llama_base_url = "http://localhost:11434/v1"
|
ai_client._llama_base_url = "http://localhost:11434/v1"
|
||||||
ai_client.set_provider("llama", "llama-3.2-3b-preview")
|
ai_client.set_provider("llama", "llama-3.2-3b-preview")
|
||||||
mock_client = MagicMock()
|
mock_response = MagicMock()
|
||||||
mock_client.chat.completions.create.return_value = MagicMock(
|
mock_response.json.return_value = {
|
||||||
choices=[MagicMock(message=MagicMock(content="hi from ollama", tool_calls=[]))],
|
"message": {"role": "assistant", "content": "hi from ollama"},
|
||||||
usage=MagicMock(prompt_tokens=5, completion_tokens=3),
|
"done": True,
|
||||||
)
|
}
|
||||||
with patch("src.ai_client._ensure_llama_client", return_value=mock_client):
|
mock_requests = MagicMock()
|
||||||
|
mock_requests.post.return_value = mock_response
|
||||||
|
with patch("src.ai_client._require_warmed", return_value=mock_requests):
|
||||||
result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None)
|
result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None)
|
||||||
assert result == "hi from ollama"
|
assert "hi from ollama" in result
|
||||||
|
called_url = mock_requests.post.call_args.args[0]
|
||||||
|
assert called_url == "http://localhost:11434/api/chat"
|
||||||
|
|
||||||
def test_send_llama_openrouter_backend(monkeypatch: pytest.MonkeyPatch) -> None:
|
def test_send_llama_openrouter_backend(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
ai_client._llama_base_url = "https://openrouter.ai/api/v1"
|
ai_client._llama_base_url = "https://openrouter.ai/api/v1"
|
||||||
|
|||||||
Reference in New Issue
Block a user