"""Red tests for native Ollama adapter (_send_llama_native + ollama_chat). When _llama_base_url points at localhost/127.0.0.1 (Ollama default), _send_llama should route to a native adapter that POSTs to /api/chat (NOT the OpenAI-compat /v1/chat/completions endpoint). The native adapter supports Ollama's vendor- specific fields: think, images, thinking. This file is t4_2 (red phase) of qwen_llama_grok_followup_20260611 Phase 4. """ from unittest.mock import MagicMock, patch import pytest from src import ai_client @pytest.fixture(autouse=True) def _reset_llama_state(): if hasattr(ai_client, '_llama_client'): ai_client._llama_client = None if hasattr(ai_client, '_llama_history'): ai_client._llama_history = [] if hasattr(ai_client, '_llama_base_url'): ai_client._llama_base_url = "http://localhost:11434/v1" if hasattr(ai_client, '_llama_api_key'): ai_client._llama_api_key = "ollama" yield def _mock_requests_with(post_response: MagicMock): """Return a context manager that patches _require_warmed('requests') with a mock whose .post returns the given response.""" mock_requests = MagicMock() mock_requests.post.return_value = post_response return patch("src.ai_client._require_warmed", return_value=mock_requests) def test_ollama_chat_posts_to_native_api_chat_endpoint() -> None: """ollama_chat hits /api/chat (not /v1/chat/completions) and returns parsed JSON.""" mock_response = MagicMock() mock_response.json.return_value = { "message": {"role": "assistant", "content": "ok"}, "done": True, } with _mock_requests_with(mock_response) as warm: result = ai_client.ollama_chat(model="llama3.2:3b", messages=[{"role": "user", "content": "hi"}]) assert result["message"]["content"] == "ok" post = warm.return_value.post called_url = post.call_args.args[0] assert called_url == "http://localhost:11434/api/chat" payload = post.call_args.kwargs["json"] assert payload["model"] == "llama3.2:3b" assert payload["stream"] is False assert payload["messages"] == [{"role": "user", "content": "hi"}] def test_ollama_chat_includes_think_param_when_set() -> None: """Ollama native adapter should set the 'think' field in the payload.""" mock_response = MagicMock() mock_response.json.return_value = {"message": {"content": "ok"}, "done": True} with _mock_requests_with(mock_response) as warm: ai_client.ollama_chat(model="qwen3:8b", messages=[{"role": "user", "content": "x"}], think="high") payload = warm.return_value.post.call_args.kwargs["json"] assert payload["think"] == "high" def test_ollama_chat_includes_images_when_provided() -> None: """Ollama native adapter should include images in the payload (base64 strings).""" mock_response = MagicMock() mock_response.json.return_value = {"message": {"content": "i see a cat"}, "done": True} with _mock_requests_with(mock_response) as warm: ai_client.ollama_chat( model="llama3.2-vision:11b", messages=[{"role": "user", "content": "describe this"}], images=["iVBOR..."], ) payload = warm.return_value.post.call_args.kwargs["json"] assert payload["images"] == ["iVBOR..."] def test_send_llama_native_calls_ollama_chat_when_localhost() -> None: """_send_llama_native wraps ollama_chat and returns the message content.""" ai_client.set_provider("llama", "llama-3.2-3b-preview") ai_client._llama_base_url = "http://localhost:11434/v1" mock_response = MagicMock() mock_response.json.return_value = { "message": {"role": "assistant", "content": "hi from native ollama"}, "done": True, } with _mock_requests_with(mock_response): result = ai_client._send_llama_native("system", "user", ".", None, "", False, None, None, None) assert "hi from native ollama" in result def test_send_llama_native_preserves_thinking_field() -> None: """Ollama's 'thinking' field should be captured and rendered in the output.""" ai_client.set_provider("llama", "qwen3:8b") ai_client._llama_base_url = "http://localhost:11434/v1" mock_response = MagicMock() mock_response.json.return_value = { "message": {"role": "assistant", "content": "answer", "thinking": "I thought about it"}, "done": True, } with _mock_requests_with(mock_response): result = ai_client._send_llama_native("system", "user", ".", None, "", False, None, None, None) assert "I thought about it" in result assert "answer" in result def test_send_llama_routes_to_native_when_localhost() -> None: """The dispatcher in _send_llama must route localhost/127.0.0.1 to _send_llama_native.""" ai_client.set_provider("llama", "llama-3.2-3b-preview") ai_client._llama_base_url = "http://localhost:11434/v1" mock_response = MagicMock() mock_response.json.return_value = { "message": {"role": "assistant", "content": "via native"}, "done": True, } with _mock_requests_with(mock_response), \ patch("src.ai_client._ensure_llama_client") as ensure: result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None) assert "via native" in result assert not ensure.called, "_send_llama should NOT instantiate the openai client for native backend" def test_send_llama_keeps_openai_path_for_non_local() -> None: """_send_llama must NOT route to native for non-localhost URLs (custom server, OpenRouter).""" ai_client.set_provider("llama", "llama-3.1-70b-versatile") ai_client._llama_base_url = "https://openrouter.ai/api/v1" mock_client = MagicMock() mock_client.chat.completions.create.return_value = MagicMock( choices=[MagicMock(message=MagicMock(content="via openrouter", tool_calls=[]))], usage=MagicMock(prompt_tokens=5, completion_tokens=3), ) with patch("src.ai_client._ensure_llama_client", return_value=mock_client) as ensure, \ _mock_requests_with(MagicMock(json=MagicMock(return_value={}))) as warm: result = ai_client._send_llama("system", "user", ".", None, "", False, None, None, None) assert "via openrouter" in result assert ensure.called assert not warm.return_value.post.called, "non-local backend must NOT hit Ollama's /api/chat"