feat(ai_client): add native Ollama adapter; route localhost to it

When _llama_base_url is localhost/127.0.0.1, _send_llama now calls _send_llama_native (the native /api/chat adapter) instead of the OpenAI-compat path. The native adapter supports Ollama's vendor-specific fields: think, images, thinking. Functions added (in src/ai_client.py, per the naming convention HARD RULE on no new src/*.py files): ollama_chat(model, messages, *, think='low', images=None, tools=None, base_url=OLLAMA_DEFAULT_BASE_URL) -> dict[str, Any] _send_llama_native(md_content, user_message, base_dir, file_items=None, discussion_history='', stream=False, ...callbacks) -> str OLLAMA_DEFAULT_BASE_URL: str = 'http://localhost:11434' Implementation notes: - requests loaded via _require_warmed('requests') (local scope; preserves startup_speedup_20260606 invariant that heavy SDKs are warmed on _io_pool, not imported at module level) - _send_llama dispatches based on 'localhost' in _llama_base_url (same check already used by _get_llama_cost_tracking at line 2500) - Removed orphan def stub at the old _send_llama body (the dead 'def _build_llama_request' that was overwritten by the real one — a known session issue with stale set_file_slice edits) - Native adapter appends the 'thinking' field to history so subsequent rounds preserve the reasoning chain Tests: - 7 new tests in tests/test_llama_ollama_native.py: * ollama_chat hits /api/chat (not /v1/chat/completions) * ollama_chat includes 'think' param in payload * ollama_chat includes 'images' in payload * _send_llama_native wraps ollama_chat * _send_llama_native preserves 'thinking' field * _send_llama routes localhost to native (no openai client) * _send_llama keeps openai path for non-local (no POST) - Updated test_send_llama_ollama_backend in test_llama_provider.py to mock the native path (was: mocked openai-compat; now: mocked requests.post) - 103/103 vendor+tool+provider+import-isolation tests pass (no regressions; +7 new tests this commit) - 4 audit scripts pass
2026-06-11 20:45:08 -04:00
parent 0a9e277564
commit 25baa6fe25
3 changed files with 193 additions and 9 deletions
@@ -0,0 +1,128 @@
+"""Red tests for native Ollama adapter (_send_llama_native + ollama_chat).
+
+When _llama_base_url points at localhost/127.0.0.1 (Ollama default), _send_llama
+should route to a native adapter that POSTs to /api/chat (NOT the OpenAI-compat
+/v1/chat/completions endpoint). The native adapter supports Ollama's vendor-
+specific fields: think, images, thinking.
+
+This file is t4_2 (red phase) of qwen_llama_grok_followup_20260611 Phase 4.
+"""
+from unittest.mock import MagicMock, patch
+import pytest
+from src import ai_client
+
+@pytest.fixture(autouse=True)
+def _reset_llama_state():
+ if hasattr(ai_client, '_llama_client'):
+  ai_client._llama_client = None
+ if hasattr(ai_client, '_llama_history'):
+  ai_client._llama_history = []
+ if hasattr(ai_client, '_llama_base_url'):
+  ai_client._llama_base_url = "http://localhost:11434/v1"
+ if hasattr(ai_client, '_llama_api_key'):
+  ai_client._llama_api_key = "ollama"
+ yield
+
+def _mock_requests_with(post_response: MagicMock):
+ """Return a context manager that patches _require_warmed('requests') with a mock whose .post returns the given response."""
+ mock_requests = MagicMock()
+ mock_requests.post.return_value = post_response
+ return patch("src.ai_client._require_warmed", return_value=mock_requests)
+
+def test_ollama_chat_posts_to_native_api_chat_endpoint() -> None:
+ """ollama_chat hits /api/chat (not /v1/chat/completions) and returns parsed JSON."""
+ mock_response = MagicMock()
+ mock_response.json.return_value = {
+  "message": {"role": "assistant", "content": "ok"},
+  "done": True,
+ }
+ with _mock_requests_with(mock_response) as warm:
+  result = ai_client.ollama_chat(model="llama3.2:3b", messages=[{"role": "user", "content": "hi"}])
+  assert result["message"]["content"] == "ok"
+  post = warm.return_value.post
+  called_url = post.call_args.args[0]
+  assert called_url == "http://localhost:11434/api/chat"
+  payload = post.call_args.kwargs["json"]
+  assert payload["model"] == "llama3.2:3b"
+  assert payload["stream"] is False
+  assert payload["messages"] == [{"role": "user", "content": "hi"}]
+
+def test_ollama_chat_includes_think_param_when_set() -> None:
+ """Ollama native adapter should set the 'think' field in the payload."""
+ mock_response = MagicMock()
+ mock_response.json.return_value = {"message": {"content": "ok"}, "done": True}
+ with _mock_requests_with(mock_response) as warm:
+  ai_client.ollama_chat(model="qwen3:8b", messages=[{"role": "user", "content": "x"}], think="high")
+  payload = warm.return_value.post.call_args.kwargs["json"]
+  assert payload["think"] == "high"
+
+def test_ollama_chat_includes_images_when_provided() -> None:
+ """Ollama native adapter should include images in the payload (base64 strings)."""
+ mock_response = MagicMock()
+ mock_response.json.return_value = {"message": {"content": "i see a cat"}, "done": True}
+ with _mock_requests_with(mock_response) as warm:
+  ai_client.ollama_chat(
+   model="llama3.2-vision:11b",
+   messages=[{"role": "user", "content": "describe this"}],
+   images=["iVBOR..."],
+  )
+  payload = warm.return_value.post.call_args.kwargs["json"]
+  assert payload["images"] == ["iVBOR..."]
+
+def test_send_llama_native_calls_ollama_chat_when_localhost() -> None:
+ """_send_llama_native wraps ollama_chat and returns the message content."""
+ ai_client.set_provider("llama", "llama-3.2-3b-preview")
+ ai_client._llama_base_url = "http://localhost:11434/v1"
+ mock_response = MagicMock()
+ mock_response.json.return_value = {
+  "message": {"role": "assistant", "content": "hi from native ollama"},
+  "done": True,
+ }
+ with _mock_requests_with(mock_response):
+  result = ai_client._send_llama_native("system", "user", ".", None, "", False, None, None, None)
+  assert "hi from native ollama" in result
+
+def test_send_llama_native_preserves_thinking_field() -> None:
+ """Ollama's 'thinking' field should be captured and rendered in the output."""
+ ai_client.set_provider("llama", "qwen3:8b")
+ ai_client._llama_base_url = "http://localhost:11434/v1"
+ mock_response = MagicMock()
+ mock_response.json.return_value = {
+  "message": {"role": "assistant", "content": "answer", "thinking": "I thought about it"},
+  "done": True,
+ }
+ with _mock_requests_with(mock_response):
+  result = ai_client._send_llama_native("system", "user", ".", None, "", False, None, None, None)
+  assert "I thought about it" in result
+  assert "answer" in result
+
+def test_send_llama_routes_to_native_when_localhost() -> None:
+ """The dispatcher in _send_llama must route localhost/127.0.0.1 to _send_llama_native."""
+ ai_client.set_provider("llama", "llama-3.2-3b-preview")
+ ai_client._llama_base_url = "http://localhost:11434/v1"
+ mock_response = MagicMock()
+ mock_response.json.return_value = {
+  "message": {"role": "assistant", "content": "via native"},
+  "done": True,
+ }
+ with _mock_requests_with(mock_response), \
+      patch("src.ai_client._ensure_llama_client") as ensure:
+  result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None)
+  assert "via native" in result
+  assert not ensure.called, "_send_llama should NOT instantiate the openai client for native backend"
+
+def test_send_llama_keeps_openai_path_for_non_local() -> None:
+ """_send_llama must NOT route to native for non-localhost URLs (custom server, OpenRouter)."""
+ ai_client.set_provider("llama", "llama-3.1-70b-versatile")
+ ai_client._llama_base_url = "https://openrouter.ai/api/v1"
+ mock_client = MagicMock()
+ mock_client.chat.completions.create.return_value = MagicMock(
+  choices=[MagicMock(message=MagicMock(content="via openrouter", tool_calls=[]))],
+  usage=MagicMock(prompt_tokens=5, completion_tokens=3),
+ )
+ with patch("src.ai_client._ensure_llama_client", return_value=mock_client) as ensure, \
+      _mock_requests_with(MagicMock(json=MagicMock(return_value={}))) as warm:
+  result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None)
+  assert "via openrouter" in result
+  assert ensure.called
+  assert not warm.return_value.post.called, "non-local backend must NOT hit Ollama's /api/chat"
@@ -17,14 +17,18 @@ def _reset_llama_state():
 def test_send_llama_ollama_backend(monkeypatch: pytest.MonkeyPatch) -> None:
 ai_client._llama_base_url = "http://localhost:11434/v1"
 ai_client.set_provider("llama", "llama-3.2-3b-preview")
- mock_client = MagicMock()
- mock_client.chat.completions.create.return_value = MagicMock(
-  choices=[MagicMock(message=MagicMock(content="hi from ollama", tool_calls=[]))],
-  usage=MagicMock(prompt_tokens=5, completion_tokens=3),
- )
- with patch("src.ai_client._ensure_llama_client", return_value=mock_client):
+ mock_response = MagicMock()
+ mock_response.json.return_value = {
+  "message": {"role": "assistant", "content": "hi from ollama"},
+  "done": True,
+ }
+ mock_requests = MagicMock()
+ mock_requests.post.return_value = mock_response
+ with patch("src.ai_client._require_warmed", return_value=mock_requests):
  result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None)
-  assert result == "hi from ollama"
+  assert "hi from ollama" in result
+  called_url = mock_requests.post.call_args.args[0]
+  assert called_url == "http://localhost:11434/api/chat"

 def test_send_llama_openrouter_backend(monkeypatch: pytest.MonkeyPatch) -> None:
 ai_client._llama_base_url = "https://openrouter.ai/api/v1"