refactor(ai_client): remove top-level SDK imports; use _require_warmed

Phase 3 T3.2 + T3.3 of startup_speedup_20260606 track. The 5 heavy SDKs (anthropic, google.genai, openai, google.genai.types, requests) are no longer imported at module level. Each function that needs them now calls _require_warmed(name) to get the module from sys.modules (populated by AppController's warmup on _io_pool). This is the load-bearing wall of the Main Thread Purity Invariant: heavy modules are never in the main thread's import chain. run_discussion_compression now uses _require_warmed for both google.genai.types (gemini branch) and requests (deepseek branch). Tests/test_tier4_patch_generation.py adapted: the 2 tests that mocked 'src.ai_client.types' (no longer a module-level attr) now mock 'src.ai_client._require_warmed' (the new public mechanism). T3.1 tests now pass (9/9). T3.3 breakage fixed. All 25 ai_client + tier4 tests pass.
2026-06-06 16:09:16 -04:00
parent ca35b3ef48
commit 51c054ece8
2 changed files with 70 additions and 24 deletions
@@ -5,25 +5,26 @@ Note(Gemini):
 Acts as the unified interface for multiple LLM providers (Anthropic, Gemini).
 Abstracts away the differences in how they handle tool schemas, history, and caching.

-For Anthropic: aggressively manages the ~200k token limit by manually culling 
-stale [FILES UPDATED] entries and dropping the oldest message pairs. 
+For Anthropic: aggressively manages the ~200k token limit by manually culling
+stale [FILES UPDATED] entries and dropping the oldest message pairs.

-For Gemini: injects the initial context directly into system_instruction 
+For Gemini: injects the initial context directly into system_instruction
 during chat creation to avoid massive history bloat.
+
+HEAVY IMPORTS (startup_speedup_20260606): The heavy SDKs (anthropic,
+google.genai, openai, google.genai.types, requests) are NOT imported
+at module level. They are warmed on AppController's _io_pool at
+startup and accessed via _require_warmed() below. This keeps the
+main thread's import chain lean and the GUI responsive on startup.
 """
-import anthropic
-from google import genai
-from openai import OpenAI
-
-from google.genai import types

+import importlib
 import asyncio
 import datetime
 import difflib
 import hashlib
 import json
 import os
-import requests # type: ignore[import-untyped]
 import sys
 import threading
 import time
@@ -51,6 +52,26 @@ from src.tool_bias    import ToolBiasEngine
 from src.tool_presets import ToolPresetManager


+def _require_warmed(name: str) -> Any:
+ """Return a heavy module that the AppController's warmup should have loaded.
+
+ Heavy SDKs (anthropic, google.genai, openai, google.genai.types,
+ requests) are warmed on AppController's _io_pool at startup. This
+ function expects them to already be in sys.modules and just returns
+ the cached module object. If the module is NOT in sys.modules (e.g.
+ in tests where warmup didn't run), falls back to importlib so the
+ call still works.
+
+ In production: this is an O(1) sys.modules lookup. The 1+ second
+ import cost is paid during startup on a bg thread, NOT on the first
+ user-triggered AI call.
+ """
+ mod = sys.modules.get(name)
+ if mod is not None:
+  return mod
+ return importlib.import_module(name)
+
+
 _provider: str = "gemini"
 _model: str = "gemini-2.5-flash-lite"
 _temperature: float = 0.0
@@ -333,11 +354,12 @@ def _load_credentials() -> dict[str, Any]:

 def _classify_anthropic_error(exc: Exception) -> ProviderError:
 try:
+  anthropic = _require_warmed("anthropic")
  if isinstance(exc, anthropic.RateLimitError):        return ProviderError("rate_limit", "anthropic", exc)
  if isinstance(exc, anthropic.AuthenticationError):   return ProviderError("auth",       "anthropic", exc)
  if isinstance(exc, anthropic.PermissionDeniedError): return ProviderError("auth",       "anthropic", exc)
  if isinstance(exc, anthropic.APIConnectionError):    return ProviderError("network",    "anthropic", exc)
-  if isinstance(exc, anthropic.APIStatusError): 
+  if isinstance(exc, anthropic.APIStatusError):
   status = getattr(exc, "status_code", 0)
   body = str(exc).lower()
   if status == 429:        return ProviderError("rate_limit", "anthropic", exc)
@@ -366,6 +388,7 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
 return ProviderError("unknown", "gemini", exc)

 def _classify_deepseek_error(exc: Exception) -> ProviderError:
+ requests = _require_warmed("requests")
 body = ""
 if isinstance(exc, requests.exceptions.HTTPError) and exc.response is not None:
  try:
@@ -389,6 +412,7 @@ def _classify_deepseek_error(exc: Exception) -> ProviderError:
 return ProviderError("unknown", "deepseek", Exception(body))

 def _classify_minimax_error(exc: Exception) -> ProviderError:
+ requests = _require_warmed("requests")
 body = ""
 if isinstance(exc, requests.exceptions.HTTPError) and exc.response is not None:
  try:
@@ -637,6 +661,7 @@ def _gemini_tool_declaration() -> Optional[types.Tool]:
 """
  [C: tests/test_tool_access_exclusion.py:test_gemini_tool_declaration_excludes_disabled]
 """
+ types = _require_warmed("google.genai.types")
 raw_tools: list[dict[str, Any]] = []
 for spec in mcp_client.get_tool_schemas():
  if _agent_tools.get(spec["name"], True):
@@ -1075,6 +1100,7 @@ def _add_history_cache_breakpoint(history: list[dict[str, Any]]) -> None:

 def _list_anthropic_models() -> list[str]:
 try:
+  anthropic = _require_warmed("anthropic")
  creds = _load_credentials()
  client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
  models: list[str] = []
@@ -1086,6 +1112,7 @@ def _list_anthropic_models() -> list[str]:

 def _ensure_anthropic_client() -> None:
 global _anthropic_client
+ anthropic = _require_warmed("anthropic")
 if _anthropic_client is None:
  creds = _load_credentials()
  _anthropic_client = anthropic.Anthropic(
@@ -1150,8 +1177,10 @@ def _repair_anthropic_history(history: list[dict[str, Any]]) -> None:

 def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict[str, Any]] | None = None, discussion_history: str = "", pre_tool_callback: Optional[Callable[[str, str, Optional[Callable[[str], str]]], Optional[str]]] = None, qa_callback: Optional[Callable[[str], str]] = None, stream_callback: Optional[Callable[[str], None]] = None, patch_callback: Optional[Callable[[str, str], Optional[str]]] = None) -> str:
 """
- [C: src/ai_server.py:_handle_send]
+  [C: src/ai_server.py:_handle_send]
 """
+ anthropic = _require_warmed("anthropic")
+ types = _require_warmed("google.genai.types")
 monitor = performance_monitor.get_monitor()
 if monitor.enabled: monitor.start_component("ai_client._send_anthropic")
 try:
@@ -1358,6 +1387,7 @@ def _list_gemini_cli_models() -> list[str]:

 def _list_gemini_models(api_key: str) -> list[str]:
 try:
+  genai = _require_warmed("google.genai")
  client = genai.Client(api_key=api_key)
  models: list[str] = []
  for m in client.models.list():
@@ -1371,12 +1401,13 @@ def _list_gemini_models(api_key: str) -> list[str]:
  raise _classify_gemini_error(exc) from exc

 def _ensure_gemini_client() -> None:
- """
-  [C: src/rag_engine.py:GeminiEmbeddingProvider.embed]
- """
- global _gemini_client
- if _gemini_client is None:
-  creds = _load_credentials()
+  """
+   [C: src/rag_engine.py:GeminiEmbeddingProvider.embed]
+  """
+  global _gemini_client
+  genai = _require_warmed("google.genai")
+  if _gemini_client is None:
+   creds = _load_credentials()
  _gemini_client = genai.Client(api_key=creds["gemini"]["api_key"])

 def _get_gemini_history_list(chat: Any | None) -> list[Any]:
@@ -1401,6 +1432,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
  [C: src/ai_server.py:_handle_send, tests/test_tier4_interceptor.py:test_gemini_provider_passes_qa_callback_to_run_script]
 """
 global _gemini_chat, _gemini_cache, _gemini_cache_md_hash, _gemini_cache_created_at, _gemini_cached_file_paths
+ types = _require_warmed("google.genai.types")
 monitor = performance_monitor.get_monitor()
 if monitor.enabled: monitor.start_component("ai_client._send_gemini")
 try:
@@ -1782,6 +1814,7 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
 """
 [C: src/ai_server.py:_handle_send]
 """
+ requests = _require_warmed("requests")
 monitor = performance_monitor.get_monitor()
 if monitor.enabled: monitor.start_component("ai_client._send_deepseek")
 try:
@@ -2033,6 +2066,8 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,

 def _list_minimax_models(api_key: str) -> list[str]:
 try:
+  openai = _require_warmed("openai")
+  OpenAI = openai.OpenAI
  client = OpenAI(api_key=api_key, base_url="https://api.minimax.io/v1")
  models_list = client.models.list()
  found = [m.id for m in models_list]
@@ -2093,6 +2128,7 @@ def _trim_minimax_history(system_blocks: list[dict[str, Any]], history: list[dic

 def _ensure_minimax_client() -> None:
 global _minimax_client
+ openai = _require_warmed("openai")
 if _minimax_client is None:
  creds = _load_credentials()
  api_key = creds.get("minimax", {}).get("api_key")
@@ -2111,6 +2147,8 @@ def _send_minimax(md_content: str, user_message: str, base_dir: str,
 """
 [C: src/ai_server.py:_handle_send]
 """
+ openai = _require_warmed("openai")
+ requests = _require_warmed("requests")
 try:
  mcp_client.configure(file_items or [], [base_dir])
  creds = _load_credentials()
@@ -2332,6 +2370,7 @@ def _send_minimax(md_content: str, user_message: str, base_dir: str,
 def run_tier4_analysis(stderr: str) -> str:
 """
 """
+ types = _require_warmed("google.genai.types")
 if not stderr or not stderr.strip():
  return ""
 try:
@@ -2381,6 +2420,7 @@ def run_tier4_patch_generation(error: str, file_context: str) -> str:
 """
  [C: src/gui_2.py:App.request_patch_from_tier4, tests/test_tier4_patch_generation.py:test_run_tier4_patch_generation_calls_ai, tests/test_tier4_patch_generation.py:test_run_tier4_patch_generation_empty_error, tests/test_tier4_patch_generation.py:test_run_tier4_patch_generation_returns_diff]
 """
+ types = _require_warmed("google.genai.types")
 if not error or not error.strip():
  return ""
 try:
@@ -2537,6 +2577,8 @@ def run_subagent_summarization(file_path: str, content: str, is_code: bool, outl
 """
  [C: src/summarize.py:summarise_file, tests/test_subagent_summarization.py:test_run_subagent_summarization_anthropic, tests/test_subagent_summarization.py:test_run_subagent_summarization_gemini]
 """
+ requests = _require_warmed("requests")
+ types = _require_warmed("google.genai.types")
 prompt_tmpl = mma_prompts.TIER4_SUMMARIZE_CODE_PROMPT if is_code else mma_prompts.TIER4_SUMMARIZE_TEXT_PROMPT
 prompt = prompt_tmpl.format(file_path=file_path, outline=outline, content=content)
 if _provider == "gemini":
@@ -2584,6 +2626,8 @@ def run_subagent_summarization(file_path: str, content: str, is_code: bool, outl
 return "ERROR: Unsupported provider for sub-agent summarization"

 def run_discussion_compression(discussion_text: str) -> str:
+ types = _require_warmed("google.genai.types")
+ requests = _require_warmed("requests")
 # Robustly identify the provider string (handles case and whitespace)
 p = str(get_provider()).lower().strip()
 prompt = f"The following is a long conversation history.\n\nPlease provide a highly compact, dense summary of the key facts, decisions, bugs encountered, and outcomes that should be retained for context going forward. Categorize into User intent, Tool outputs, and AI reasoning. Omit pleasantries and redundant thoughts.\n\n[HISTORY]\n{discussion_text}"
@@ -36,31 +36,33 @@ def test_run_tier4_patch_generation_empty_error() -> None:

 def test_run_tier4_patch_generation_calls_ai() -> None:
  """Test that run_tier4_patch_generation calls the AI with the correct prompt."""
+  mock_types = MagicMock()
+  mock_types.GenerateContentConfig = MagicMock()
  with patch("src.ai_client._ensure_gemini_client"), \
     patch("src.ai_client._gemini_client", create=True) as mock_client, \
-     patch("src.ai_client.types") as mock_types:
+     patch("src.ai_client._require_warmed", return_value=mock_types):
    mock_resp = MagicMock()
    mock_resp.text = "--- a/test.py\n+++ b/test.py\n@@ -1 +1 @@\n-old\n+new"
    mock_client.models.generate_content.return_value = mock_resp
-    mock_types.GenerateContentConfig = MagicMock()
-    
+
    error = "TypeError: unsupported operand"
    file_context = "def foo():\n    pass"
    result = ai_client.run_tier4_patch_generation(error, file_context)
-    
+
    mock_client.models.generate_content.assert_called()

 def test_run_tier4_patch_generation_returns_diff() -> None:
  """Test that run_tier4_patch_generation returns diff text."""
+  mock_types = MagicMock()
+  mock_types.GenerateContentConfig = MagicMock()
  with patch("src.ai_client._ensure_gemini_client"), \
     patch("src.ai_client._gemini_client", create=True) as mock_client, \
-     patch("src.ai_client.types") as mock_types:
+     patch("src.ai_client._require_warmed", return_value=mock_types):
    expected_diff = "--- a/src/test.py\n+++ b/src/test.py\n@@ -10,5 +10,6 @@\n def test_func():\n-    old_value = 1\n+    old_value = 1\n+    new_value = 2"
    mock_resp = MagicMock()
    mock_resp.text = expected_diff
    mock_client.models.generate_content.return_value = mock_resp
-    mock_types.GenerateContentConfig = MagicMock()
-    
+
    result = ai_client.run_tier4_patch_generation("error", "context")
    assert "---" in result
    assert "+++" in result