From 25baa6fe25721db687527c362009283c7dff86ec Mon Sep 17 00:00:00 2001 From: Ed_ Date: Thu, 11 Jun 2026 20:45:08 -0400 Subject: [PATCH] feat(ai_client): add native Ollama adapter; route localhost to it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When _llama_base_url is localhost/127.0.0.1, _send_llama now calls _send_llama_native (the native /api/chat adapter) instead of the OpenAI-compat path. The native adapter supports Ollama's vendor-specific fields: think, images, thinking. Functions added (in src/ai_client.py, per the naming convention HARD RULE on no new src/*.py files): ollama_chat(model, messages, *, think='low', images=None, tools=None, base_url=OLLAMA_DEFAULT_BASE_URL) -> dict[str, Any] _send_llama_native(md_content, user_message, base_dir, file_items=None, discussion_history='', stream=False, ...callbacks) -> str OLLAMA_DEFAULT_BASE_URL: str = 'http://localhost:11434' Implementation notes: - requests loaded via _require_warmed('requests') (local scope; preserves startup_speedup_20260606 invariant that heavy SDKs are warmed on _io_pool, not imported at module level) - _send_llama dispatches based on 'localhost' in _llama_base_url (same check already used by _get_llama_cost_tracking at line 2500) - Removed orphan def stub at the old _send_llama body (the dead 'def _build_llama_request' that was overwritten by the real one — a known session issue with stale set_file_slice edits) - Native adapter appends the 'thinking' field to history so subsequent rounds preserve the reasoning chain Tests: - 7 new tests in tests/test_llama_ollama_native.py: * ollama_chat hits /api/chat (not /v1/chat/completions) * ollama_chat includes 'think' param in payload * ollama_chat includes 'images' in payload * _send_llama_native wraps ollama_chat * _send_llama_native preserves 'thinking' field * _send_llama routes localhost to native (no openai client) * _send_llama keeps openai path for non-local (no POST) - Updated test_send_llama_ollama_backend in test_llama_provider.py to mock the native path (was: mocked openai-compat; now: mocked requests.post) - 103/103 vendor+tool+provider+import-isolation tests pass (no regressions; +7 new tests this commit) - 4 audit scripts pass --- src/ai_client.py | 56 ++++++++++++- tests/test_llama_ollama_native.py | 128 ++++++++++++++++++++++++++++++ tests/test_llama_provider.py | 18 +++-- 3 files changed, 193 insertions(+), 9 deletions(-) create mode 100644 tests/test_llama_ollama_native.py diff --git a/src/ai_client.py b/src/ai_client.py index f7dcbe3a..217ffef3 100644 --- a/src/ai_client.py +++ b/src/ai_client.py @@ -2460,6 +2460,8 @@ def _send_llama(md_content: str, user_message: str, base_dir: str, qa_callback: Optional[Callable[[str], str]] = None, stream_callback: Optional[Callable[[str], None]] = None, patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str: + if "localhost" in _llama_base_url or "127.0.0.1" in _llama_base_url: + return _send_llama_native(md_content, user_message, base_dir, file_items, discussion_history, stream, pre_tool_callback, qa_callback, stream_callback, patch_callback) from src.openai_compatible import OpenAICompatibleRequest client = _ensure_llama_client() tools: list[dict[str, Any]] | None = _get_deepseek_tools() or None @@ -2473,8 +2475,6 @@ def _send_llama(md_content: str, user_message: str, base_dir: str, _llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"}) else: _llama_history.append({"role": "user", "content": user_content}) - def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest: - _llama_history.append({"role": "user", "content": user_content}) def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest: with _llama_history_lock: messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n\n{md_content}\n"}] @@ -2492,6 +2492,58 @@ def _send_llama(md_content: str, user_message: str, base_dir: str, history_lock=_llama_history_lock, history=_llama_history, ) +OLLAMA_DEFAULT_BASE_URL: str = "http://localhost:11434" + +def ollama_chat( + model: str, + messages: list[dict[str, Any]], + *, + think: str = "low", + images: list[str] | None = None, + tools: list[dict[str, Any]] | None = None, + base_url: str = OLLAMA_DEFAULT_BASE_URL, + ) -> dict[str, Any]: + requests = _require_warmed("requests") + payload: dict[str, Any] = {"model": model, "messages": messages, "stream": False} + if think: + payload["think"] = think + if images: + payload["images"] = images + if tools: + payload["tools"] = tools + resp = requests.post(f"{base_url}/api/chat", json=payload, timeout=120) + return resp.json() + +def _send_llama_native(md_content: str, user_message: str, base_dir: str, + file_items: list[dict[str, Any]] | None = None, + discussion_history: str = "", + stream: bool = False, + pre_tool_callback: Optional[Callable[[str, str, Optional[Callable[[str], str]]], Optional[str]]] = None, + qa_callback: Optional[Callable[[str], str]] = None, + stream_callback: Optional[Callable[[str], None]] = None, + patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str: + base_url = _llama_base_url.replace("/v1", "") + with _llama_history_lock: + if discussion_history and not _llama_history: + _llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"}) + else: + _llama_history.append({"role": "user", "content": user_message}) + messages: list[dict[str, Any]] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n\n{md_content}\n"}] + messages.extend(_llama_history) + images: list[str] = [] + if file_items: + for fi in file_items: + if fi.get("is_image") and fi.get("base64_data"): + images.append(fi["base64_data"]) + response = ollama_chat(_model, messages, images=images, base_url=base_url) + text = response.get("message", {}).get("content", "") + thinking = response.get("message", {}).get("thinking", "") + with _llama_history_lock: + msg: dict[str, Any] = {"role": "assistant", "content": text or None} + if thinking: + msg["thinking"] = thinking + _llama_history.append(msg) + return (f"\n{thinking}\n\n" if thinking else "") + text def _list_llama_models() -> list[str]: from src.vendor_capabilities import list_models_for_vendor return list_models_for_vendor("llama") diff --git a/tests/test_llama_ollama_native.py b/tests/test_llama_ollama_native.py new file mode 100644 index 00000000..068b212d --- /dev/null +++ b/tests/test_llama_ollama_native.py @@ -0,0 +1,128 @@ +"""Red tests for native Ollama adapter (_send_llama_native + ollama_chat). + +When _llama_base_url points at localhost/127.0.0.1 (Ollama default), _send_llama +should route to a native adapter that POSTs to /api/chat (NOT the OpenAI-compat +/v1/chat/completions endpoint). The native adapter supports Ollama's vendor- +specific fields: think, images, thinking. + +This file is t4_2 (red phase) of qwen_llama_grok_followup_20260611 Phase 4. +""" +from unittest.mock import MagicMock, patch +import pytest +from src import ai_client + +@pytest.fixture(autouse=True) +def _reset_llama_state(): + if hasattr(ai_client, '_llama_client'): + ai_client._llama_client = None + if hasattr(ai_client, '_llama_history'): + ai_client._llama_history = [] + if hasattr(ai_client, '_llama_base_url'): + ai_client._llama_base_url = "http://localhost:11434/v1" + if hasattr(ai_client, '_llama_api_key'): + ai_client._llama_api_key = "ollama" + yield + +def _mock_requests_with(post_response: MagicMock): + """Return a context manager that patches _require_warmed('requests') with a mock whose .post returns the given response.""" + mock_requests = MagicMock() + mock_requests.post.return_value = post_response + return patch("src.ai_client._require_warmed", return_value=mock_requests) + +def test_ollama_chat_posts_to_native_api_chat_endpoint() -> None: + """ollama_chat hits /api/chat (not /v1/chat/completions) and returns parsed JSON.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "message": {"role": "assistant", "content": "ok"}, + "done": True, + } + with _mock_requests_with(mock_response) as warm: + result = ai_client.ollama_chat(model="llama3.2:3b", messages=[{"role": "user", "content": "hi"}]) + assert result["message"]["content"] == "ok" + post = warm.return_value.post + called_url = post.call_args.args[0] + assert called_url == "http://localhost:11434/api/chat" + payload = post.call_args.kwargs["json"] + assert payload["model"] == "llama3.2:3b" + assert payload["stream"] is False + assert payload["messages"] == [{"role": "user", "content": "hi"}] + +def test_ollama_chat_includes_think_param_when_set() -> None: + """Ollama native adapter should set the 'think' field in the payload.""" + mock_response = MagicMock() + mock_response.json.return_value = {"message": {"content": "ok"}, "done": True} + with _mock_requests_with(mock_response) as warm: + ai_client.ollama_chat(model="qwen3:8b", messages=[{"role": "user", "content": "x"}], think="high") + payload = warm.return_value.post.call_args.kwargs["json"] + assert payload["think"] == "high" + +def test_ollama_chat_includes_images_when_provided() -> None: + """Ollama native adapter should include images in the payload (base64 strings).""" + mock_response = MagicMock() + mock_response.json.return_value = {"message": {"content": "i see a cat"}, "done": True} + with _mock_requests_with(mock_response) as warm: + ai_client.ollama_chat( + model="llama3.2-vision:11b", + messages=[{"role": "user", "content": "describe this"}], + images=["iVBOR..."], + ) + payload = warm.return_value.post.call_args.kwargs["json"] + assert payload["images"] == ["iVBOR..."] + +def test_send_llama_native_calls_ollama_chat_when_localhost() -> None: + """_send_llama_native wraps ollama_chat and returns the message content.""" + ai_client.set_provider("llama", "llama-3.2-3b-preview") + ai_client._llama_base_url = "http://localhost:11434/v1" + mock_response = MagicMock() + mock_response.json.return_value = { + "message": {"role": "assistant", "content": "hi from native ollama"}, + "done": True, + } + with _mock_requests_with(mock_response): + result = ai_client._send_llama_native("system", "user", ".", None, "", False, None, None, None) + assert "hi from native ollama" in result + +def test_send_llama_native_preserves_thinking_field() -> None: + """Ollama's 'thinking' field should be captured and rendered in the output.""" + ai_client.set_provider("llama", "qwen3:8b") + ai_client._llama_base_url = "http://localhost:11434/v1" + mock_response = MagicMock() + mock_response.json.return_value = { + "message": {"role": "assistant", "content": "answer", "thinking": "I thought about it"}, + "done": True, + } + with _mock_requests_with(mock_response): + result = ai_client._send_llama_native("system", "user", ".", None, "", False, None, None, None) + assert "I thought about it" in result + assert "answer" in result + +def test_send_llama_routes_to_native_when_localhost() -> None: + """The dispatcher in _send_llama must route localhost/127.0.0.1 to _send_llama_native.""" + ai_client.set_provider("llama", "llama-3.2-3b-preview") + ai_client._llama_base_url = "http://localhost:11434/v1" + mock_response = MagicMock() + mock_response.json.return_value = { + "message": {"role": "assistant", "content": "via native"}, + "done": True, + } + with _mock_requests_with(mock_response), \ + patch("src.ai_client._ensure_llama_client") as ensure: + result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None) + assert "via native" in result + assert not ensure.called, "_send_llama should NOT instantiate the openai client for native backend" + +def test_send_llama_keeps_openai_path_for_non_local() -> None: + """_send_llama must NOT route to native for non-localhost URLs (custom server, OpenRouter).""" + ai_client.set_provider("llama", "llama-3.1-70b-versatile") + ai_client._llama_base_url = "https://openrouter.ai/api/v1" + mock_client = MagicMock() + mock_client.chat.completions.create.return_value = MagicMock( + choices=[MagicMock(message=MagicMock(content="via openrouter", tool_calls=[]))], + usage=MagicMock(prompt_tokens=5, completion_tokens=3), + ) + with patch("src.ai_client._ensure_llama_client", return_value=mock_client) as ensure, \ + _mock_requests_with(MagicMock(json=MagicMock(return_value={}))) as warm: + result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None) + assert "via openrouter" in result + assert ensure.called + assert not warm.return_value.post.called, "non-local backend must NOT hit Ollama's /api/chat" diff --git a/tests/test_llama_provider.py b/tests/test_llama_provider.py index f9f785b1..d61382df 100644 --- a/tests/test_llama_provider.py +++ b/tests/test_llama_provider.py @@ -17,14 +17,18 @@ def _reset_llama_state(): def test_send_llama_ollama_backend(monkeypatch: pytest.MonkeyPatch) -> None: ai_client._llama_base_url = "http://localhost:11434/v1" ai_client.set_provider("llama", "llama-3.2-3b-preview") - mock_client = MagicMock() - mock_client.chat.completions.create.return_value = MagicMock( - choices=[MagicMock(message=MagicMock(content="hi from ollama", tool_calls=[]))], - usage=MagicMock(prompt_tokens=5, completion_tokens=3), - ) - with patch("src.ai_client._ensure_llama_client", return_value=mock_client): + mock_response = MagicMock() + mock_response.json.return_value = { + "message": {"role": "assistant", "content": "hi from ollama"}, + "done": True, + } + mock_requests = MagicMock() + mock_requests.post.return_value = mock_response + with patch("src.ai_client._require_warmed", return_value=mock_requests): result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None) - assert result == "hi from ollama" + assert "hi from ollama" in result + called_url = mock_requests.post.call_args.args[0] + assert called_url == "http://localhost:11434/api/chat" def test_send_llama_openrouter_backend(monkeypatch: pytest.MonkeyPatch) -> None: ai_client._llama_base_url = "https://openrouter.ai/api/v1"