conductor(track): Initialize qwen_llama_grok_integration_20260606 spec

Three new vendors + capability matrix framework + MiniMax refactor: **Capability matrix v1 (7 features):** vision, tool_calling, caching, streaming, model_discovery, context_window, cost_tracking. Audio and server-side code execution deferred to a follow-up track. **Qwen via DashScope native SDK:** Qwen-Turbo, Qwen-Plus, Qwen-Max, Qwen-Long (1M context), Qwen-VL-Plus/Max (vision), Qwen-Audio. Native API chosen over OpenAI-compatible mode to unlock Qwen-Audio, Qwen-Long custom chunking, and Qwen-VL-Max enhanced vision. **Llama (OpenAI-compatible, multi-backend):** Ollama (local, free), OpenRouter (cloud aggregator covering Together/Groq/Fireworks), custom URL escape hatch. Models: Llama 3.1 8B/70B/405B, 3.2 1B/3B, 3.2 11B/90B Vision, 3.3 70B. **Grok via xAI (OpenAI-compatible):** Grok-2, Grok-2-Vision, Grok-Beta. **Shared OpenAI-compatible helper** in src/openai_compatible.py processes a normalized request/response data structure; each _send_<vendor>() is a thin adapter at the boundary (data-oriented design per Fleury/Acton/Lottes). **MiniMax refactor:** ~250 lines reduced to ~50 by using the shared helper. Existing test_minimax_provider.py is the safety net. **UX adaptation:** 9 UI elements (screenshot, tools toggle, cache panel, stream progress, fetch models, token budget, cost panel) read from the matrix instead of hard-coding per-vendor branches. **Out of scope (deferred):** Anthropic/Gemini/DeepSeek migration to the matrix (separate track), audio input, server-side code execution, PDF input, batch API, fine-tuning. 6 phases planned: matrix+helper, Qwen, Grok+Llama, MiniMax refactor, UX adaptation, docs+archive.
2026-06-06 14:56:00 -04:00
parent 7eb743c6cb
commit 7c1d597ef1
3 changed files with 739 additions and 0 deletions
@@ -0,0 +1,122 @@
+{
+  "track_id": "qwen_llama_grok_integration_20260606",
+  "name": "Qwen, Llama & Grok Vendor Integration + Capability Matrix",
+  "initialized": "2026-06-06",
+  "owner": "tier2-tech-lead",
+  "priority": "high",
+  "status": "active",
+  "type": "feature + refactor",
+  "scope": {
+    "new_files": [
+      "src/vendor_capabilities.py",
+      "src/openai_compatible.py",
+      "tests/test_vendor_capabilities.py",
+      "tests/test_openai_compatible.py",
+      "tests/test_qwen_provider.py",
+      "tests/test_llama_provider.py",
+      "tests/test_grok_provider.py"
+    ],
+    "modified_files": [
+      "src/ai_client.py",
+      "src/cost_tracker.py",
+      "src/models.py",
+      "src/gui_2.py",
+      "src/app_controller.py",
+      "credentials_template.toml",
+      "pyproject.toml",
+      "tests/test_minimax_provider.py",
+      "docs/guide_ai_client.md",
+      "docs/guide_models.md"
+    ]
+  },
+  "blocked_by": [],
+  "blocks": ["anthropic_gemini_deepseek_capability_matrix_20260606" /* not yet created; conceptual follow-up */],
+  "estimated_phases": 6,
+  "spec": "spec.md",
+  "plan": "plan.md",
+  "priority_order": "A (capability matrix framework + 3 new vendors) > B (shared helper + MiniMax refactor) > C (UX adaptation + docs)",
+  "capability_matrix_v1": ["vision", "tool_calling", "caching", "streaming", "model_discovery", "context_window", "cost_tracking"],
+  "capability_matrix_deferred": ["audio_input", "pdf_input", "server_side_code_execution", "image_generation", "fine_tuning", "batch_api"],
+  "data_oriented_design": {
+    "shared_data_structure": "NormalizedResponse (text, tool_calls, usage_*) + OpenAICompatibleRequest (messages, tools, model, ...)",
+    "shared_algorithm": "send_openai_compatible(client, request, capabilities) -> NormalizedResponse in src/openai_compatible.py",
+    "per_vendor_boundary": "Each _send_<vendor>() is a thin adapter: init client, load history, call shared helper, update history, return text",
+    "philosophy_references": ["Ryan Fleury (code/data separation)", "Mike Acton (data-oriented design)", "Timothy Lottes (cache-aware algorithms)"]
+  },
+  "vendors_added": {
+    "qwen": {
+      "api": "DashScope native SDK",
+      "rationale": "Qwen-Audio, Qwen-Long (1M context), Qwen-VL-Max require native API; OpenAI-compatible mode loses them",
+      "sdk": "dashscope>=1.14.0",
+      "models_shipped": ["qwen-turbo", "qwen-plus", "qwen-max", "qwen-long", "qwen-vl-plus", "qwen-vl-max", "qwen-audio"]
+    },
+    "llama": {
+      "api": "OpenAI-compatible (multi-backend)",
+      "rationale": "Llama has no first-party API; backend is per-project config",
+      "backends_v1": ["ollama (local)", "openrouter (cloud aggregator)", "custom_url (escape hatch)"],
+      "models_shipped": ["llama-3.1-8b-instant", "llama-3.1-70b-versatile", "llama-3.1-405b-reasoning", "llama-3.2-1b-preview", "llama-3.2-3b-preview", "llama-3.2-11b-vision-preview", "llama-3.2-90b-vision-preview", "llama-3.3-70b-specdec"]
+    },
+    "grok": {
+      "api": "xAI (OpenAI-compatible)",
+      "rationale": "xAI's API is OpenAI-compatible; value is filling the matrix entry and exposing Grok-2-Vision",
+      "sdk": "openai>=1.0.0 (already a dependency)",
+      "models_shipped": ["grok-2", "grok-2-vision", "grok-beta"]
+    }
+  },
+  "refactor_scope": {
+    "minimax": "Refactor _send_minimax() (~250 lines) to use send_openai_compatible() helper (~50 lines)",
+    "anthropic": "DEFERRED to follow-up track",
+    "gemini": "DEFERRED to follow-up track",
+    "deepseek": "DEFERRED to follow-up track"
+  },
+  "ux_adaptations": [
+    "Screenshot button enabled iff vision=true",
+    "Tools enabled toggle enabled iff tool_calling=true",
+    "Cache panel visible iff caching=true",
+    "Stream progress visible iff streaming=true",
+    "Fetch Models button enabled iff model_discovery=true",
+    "Token budget max = capabilities.context_window",
+    "Cost panel shows estimate iff cost_tracking=true",
+    "Cost panel shows 'Free (local)' for localhost + cost_tracking=false",
+    "Cost panel shows '—' for other cost_tracking=false cases"
+  ],
+  "architectural_invariant": "Every _send_<vendor>() is a thin boundary adapter; the shared algorithm lives in send_openai_compatible(); the capability matrix is the authoritative source of per-(vendor, model) feature support; the GUI adapts to the matrix, not to vendor names.",
+  "threading_constraint": "Same as existing pattern: _send_lock serializes all send() calls; per-vendor history locks (e.g. _minimax_history_lock) guard history mutations; the shared helper is stateless and thread-safe (the OpenAI SDK is thread-safe for distinct clients; the caller owns the client).",
+  "verification_criteria": [
+    "src/vendor_capabilities.py:get_capabilities(vendor, model) returns correct VendorCapabilities for all 4 OpenAI-compatible vendors + Qwen models",
+    "src/vendor_capabilities.py:get_capabilities fallback to vendor default when model not registered",
+    "src/openai_compatible.py:send_openai_compatible handles streaming, non-streaming, tool calls, vision, errors",
+    "src/openai_compatible.py:send_openai_compatible classifies OpenAI errors to ProviderError kinds",
+    "_send_qwen() uses DashScope SDK; tool format translated from OpenAI shape",
+    "_send_qwen() handles Qwen-VL vision (image base64), Qwen-Audio stub",
+    "_send_llama() supports Ollama, OpenRouter, custom URL backends",
+    "_send_llama() unions Ollama /api/tags and OpenRouter /v1/models for model discovery",
+    "_send_grok() uses xAI endpoint (base_url hardcoded to https://api.x.ai/v1)",
+    "_send_grok() handles Grok-2-Vision vision",
+    "_send_minimax() refactored: ~50 lines instead of ~250, all existing test_minimax_provider.py tests pass",
+    "GUI: screenshot button enabled iff capabilities.vision is true for the active (vendor, model)",
+    "GUI: cost panel shows correct value (estimate, 'Free (local)', or '—') based on capabilities.cost_tracking and base URL",
+    "GUI: 9 UX adaptations from spec.md §6 all work end-to-end",
+    "No regressions in 273+ existing tests (full test suite passes)",
+    "No new threading.Thread calls in src/ (per project invariant)",
+    "No top-level heavy imports in src/ai_client.py beyond what's already there (dashscope import is acceptable; flag if it pushes import time > 100ms)"
+  ],
+  "links": {
+    "backlog_entry": "conductor/tracks.md (to be added)",
+    "ai_client_guide": "docs/guide_ai_client.md",
+    "models_guide": "docs/guide_models.md",
+    "workflow_pitfalls": "conductor/workflow.md#known-pitfalls-2026-06-05",
+    "related_tracks": [
+      "conductor/tracks/openai_integration_20260308/",
+      "conductor/tracks/zhipu_integration_20260308/",
+      "conductor/tracks/startup_speedup_20260606/",
+      "conductor/tracks/test_batching_refactor_20260606/"
+    ],
+    "external_docs": [
+      "https://help.aliyun.com/zh/model-studio/ (DashScope)",
+      "https://openrouter.ai/docs (OpenRouter)",
+      "https://github.com/ollama/ollama/blob/main/docs/openai.md (Ollama OpenAI compat)",
+      "https://docs.x.ai/ (xAI)"
+    ]
+  }
+}
@@ -0,0 +1,483 @@
+# Track: Qwen, Llama & Grok Vendor Integration + Capability Matrix
+
+**Status:** Active (spec approved 2026-06-06)
+**Initialized:** 2026-06-06
+**Owner:** Tier 2 Tech Lead
+**Priority:** High (extends vendor matrix; foundational for future open-source / self-hosted support)
+
+---
+
+## 1. Overview
+
+This track adds first-class support for three new AI vendors — **Qwen** (via Alibaba DashScope native API), **Llama** (via Ollama local, OpenRouter cloud, and custom base URL), and **Grok** (via xAI's OpenAI-compatible endpoint) — alongside a new **Vendor Capability Matrix** that declares per-(vendor, model) feature support and lets the GUI adapt dynamically instead of hard-coding per-vendor UI branches.
+
+The track also refactors the existing **MiniMax** provider to use a new shared OpenAI-compatible send helper, eliminating the duplicate OpenAI-compatible request/response logic that the new vendors would otherwise introduce. This is a data-oriented refactor (Fleury / Acton / Lottes framing): the shared helper is the algorithm that operates on a normalized message data structure; each vendor's entry point is a thin adapter that translates vendor-specific request/response shapes into the normalized form at the boundary.
+
+The follow-up track "Anthropic / Gemini / DeepSeek Capability Matrix Migration" (see §13.1) will migrate the remaining three providers onto the same matrix in a separate effort. This track stays focused on the greenfield additions + the safe MiniMax refactor.
+
+## 2. Goals (Priority Order)
+
+| Priority | Goal | Rationale |
+|---|---|---|
+| **A (foundational)** | Vendor Capability Matrix framework. Per-(vendor, model) feature declarations. UX reads the matrix to enable/disable UI elements. | The user's stated architectural goal: "aggregate all those granular features into a feature support listing... the ux can adjust what's available." Per Casey Muratori's module-layer-boundary pattern: `ai_client` is the authoritative owner of "what can vendor X do"; `gui_2` adapts to that surface. |
+| **A (primary value)** | Qwen via DashScope native SDK. Wire Qwen-Plus, Qwen-Max, Qwen-Long (1M+ context), Qwen-VL-Plus, Qwen-VL-Max (vision), Qwen-Audio. | Qwen has a meaningful unique API surface (vs OpenAI-compatible). DashScope native SDK unlocks features that the OpenAI-compatible mode loses (Qwen-Audio, Qwen-Long custom chunking, Qwen-VL-Max enhanced vision). |
+| **A (primary value)** | Llama via Ollama (local) + OpenRouter (cloud) + custom base URL. | Llama has no first-party API. The "vendor" is the model family; the backend is per-project config. Ollama covers local; OpenRouter is the universal cloud aggregator (Together, Groq, Fireworks, etc. all flow through it); custom URL is the escape hatch for self-hosted / unusual backends. |
+| **A (primary value)** | Grok via xAI (OpenAI-compatible). Wire Grok-2, Grok-2-Vision. | xAI's API is OpenAI-compatible; the value is filling in the matrix entry and exposing Grok-2-Vision for the screenshot feature. |
+| **B (architectural)** | Shared OpenAI-compatible helper in `src/openai_compatible.py`. MiniMax, Llama, Grok all call into it. | Data-oriented design: share the algorithm (HTTP call, response parsing, tool-call detection, streaming, history repair, error classification) on a normalized data structure. Each vendor entry point is a thin adapter. |
+| **B (architectural)** | MiniMax refactored to use the shared helper. | MiniMax is already OpenAI-compatible; pure win, ~250 lines of duplicated logic deleted. Mitigated by existing `tests/test_minimax_provider.py`. |
+| **C (optimization)** | Capability matrix v1 populates for the 4 OpenAI-compatible vendors + Qwen. Anthropic/Gemini/DeepSeek get "pending migration" entries; the UX does not read them yet. | Half-baked matrix is worse than no matrix. Populating for the vendors that share the new helper keeps the matrix meaningful without risking regressions in the unique-API vendors. |
+| **C (optimization)** | UX adapts to the matrix: vision button hidden when `vision: false`; cache panel hidden when `caching: false`; cost panel shows "—" when `cost_tracking: false` (e.g., local backends). | The whole point of the matrix. Specific UI adaptations listed in §8. |
+
+### 2.1 Non-Goals (this track)
+
+- **Not** migrating Anthropic, Gemini, or DeepSeek to the capability matrix. They have genuinely unique APIs (4-breakpoint caching, genai SDK, raw HTTP) and their migration belongs in a separate, careful track. Stub entries: "pending_migration".
+- **Not** adding audio input support (Qwen-Audio's audio files). Audio is a deferred capability (§6).
+- **Not** adding server-side code execution. Deferred to §6.
+- **Not** changing the AI Settings panel layout beyond the minimum needed to expose the new providers and the capability-driven UI adaptations.
+- **Not** adding model fine-tuning management for any of the three new vendors.
+- **Not** adding batch API support for any of the three new vendors.
+
+## 3. Architecture
+
+### 3.1 Data-Oriented Design (Fleury / Acton / Lottes)
+
+The user's design philosophy (referencing Ryan Fleury's code/data separation, Mike Acton's data-oriented design, Timothy Lottes' cache-aware algorithms) translates concretely to:
+
+- **The data is the API.** The "OpenAI-compatible send" operates on a normalized data structure: `messages: list[dict]`, `tools: list[dict]`, `model_capabilities: VendorCapabilities`, `response: NormalizedResponse`. The structure is laid out linearly (SoA where applicable) and processed in bulk.
+- **The algorithm is shared.** One function: `send_openai_compatible(client, model, messages, tools, capabilities, *, stream_callback=None) -> NormalizedResponse`. It handles HTTP, response parsing, tool-call detection, streaming chunk aggregation, error classification, history repair, and token usage extraction — all on the normalized data.
+- **The adapters are per-vendor.** Each vendor's `_send_<vendor>()` is a thin function that:
+  1. Initializes the vendor-specific client (OpenAI SDK with vendor's base URL + auth, or DashScope SDK).
+  2. Loads the vendor's history (`_minimax_history`, `_llama_history`, etc.) and capabilities from the registry.
+  3. Calls `send_openai_compatible(...)` (or, for Qwen, the DashScope-specific helper).
+  4. Updates the vendor's history with the normalized response.
+  5. Returns the text content to `ai_client.send()`.
+
+This means:
+- **Adding a new OpenAI-compatible vendor** = 50 lines of glue (client init + capability declaration + history storage), not 300 lines of duplicated logic.
+- **Anthropic/Gemini/DeepKeep** stay per-vendor code paths; the data-oriented refactor doesn't apply to them because their unique APIs are not OpenAI-compatible-shaped.
+- **"Base paths are unique"** (the user's wording) means: `_send_qwen()`, `_send_llama()`, `_send_grok()`, `_send_minimax()` are the unique entry points; everything they call into is shared.
+
+### 3.2 Module Layout
+
+```
+src/
+  ai_client.py                    # Modified: refactor _send_minimax; add _send_qwen/_send_llama/_send_grok
+  vendor_capabilities.py           # NEW: VendorCapabilities dataclass, registry, get_capabilities()
+  openai_compatible.py             # NEW: shared OpenAI-compatible send helper
+  cost_tracker.py                  # Modified: add Qwen/Llama/Grok pricing
+  models.py                        # Modified: add provider metadata for Qwen/Llama/Grok
+  gui_2.py                         # Modified: register Qwen/Llama/Grok in PROVIDERS; capability-driven UI
+  app_controller.py                # Modified: same
+  credentials_template.toml        # Modified: add [qwen], [llama], [grok] sections
+```
+
+```
+tests/
+  test_vendor_capabilities.py      # NEW: capability matrix tests
+  test_openai_compatible.py        # NEW: shared helper tests
+  test_qwen_provider.py            # NEW: Qwen-specific tests (DashScope adapter, history repair, error classification)
+  test_llama_provider.py           # NEW: Llama-specific tests (multi-backend, model discovery)
+  test_grok_provider.py            # NEW: Grok-specific tests (xAI endpoint, Grok-2-Vision)
+  test_minimax_provider.py         # Modified: verify refactor preserves behavior
+```
+
+### 3.3 Capability Matrix v1 — 7 Capabilities
+
+| Capability | Type | Purpose | UX Effect |
+|---|---|---|---|
+| `vision` | `bool` | Can accept image inputs (screenshots). | Screenshot button enabled/disabled in message panel. |
+| `tool_calling` | `bool` | Supports function/tool calls. | Tool system toggle; "Tools enabled" indicator. |
+| `caching` | `bool` | Supports server-side prompt caching (Gemini explicit, Anthropic ephemeral). | Cache panel visible/hidden. Cache indicators in token budget. |
+| `streaming` | `bool` | Supports streaming responses. | Stream progress bar visible/hidden. |
+| `model_discovery` | `bool` | Backend exposes `/v1/models` (or equivalent) for live model list. | "Fetch Models" button enabled/disabled. |
+| `context_window` | `int` | Maximum input tokens for this model. | Token budget panel max. |
+| `cost_tracking` | `bool` | Per-token pricing known. | Cost panel shows estimate; hides with "—" for unknown. |
+
+**Deferred to v2 (separate track):**
+- `audio_input` (Qwen-Audio only)
+- `pdf_input` (Gemini, Anthropic)
+- `server_side_code_execution` (Anthropic, OpenAI, Gemini)
+- `image_generation`, `fine_tuning`, `batch_api` (none currently)
+
+### 3.4 Per-(vendor, model) Capabilities
+
+Capabilities are declared per-model, not per-vendor, because a vendor can have both vision and text-only models (Qwen: Qwen-VL-Plus vs Qwen-Plus; Llama: 3.2-Vision vs 3.2-1B/3B; Grok: Grok-2-Vision vs Grok-2).
+
+```python
+@dataclass(frozen=True)
+class VendorCapabilities:
+ vendor: str                        # "qwen" | "llama" | "grok" | "minimax" | "anthropic" | "gemini" | ...
+ model: str                         # the model name, e.g. "qwen-vl-max" or "*" for vendor default
+ vision: bool = False
+ tool_calling: bool = True
+ caching: bool = False
+ streaming: bool = True
+ model_discovery: bool = True
+ context_window: int = 8192         # tokens
+ cost_tracking: bool = True         # False for local backends where cost is unknown/free
+ cost_input_per_mtok: float = 0.0   # USD per million input tokens
+ cost_output_per_mtok: float = 0.0  # USD per million output tokens
+ notes: str = ""
+```
+
+**Lookup pattern:** `get_capabilities(vendor, model) -> VendorCapabilities`. The registry is a flat dict keyed by `(vendor, model)`. Lookups fall back to the vendor's default entry if a specific model isn't registered.
+
+**Registry source of truth:** `src/vendor_capabilities.py` has a hardcoded `_REGISTRY: dict[tuple[str, str], VendorCapabilities]` populated at import time. The data is in code (not TOML) because:
+- It's referenced by `_send_<vendor>()` per call (hot path; can't afford file I/O).
+- Changes are tied to vendor SDK updates and are code-reviewed.
+- TOML is for user-config (credentials, project settings); vendor capabilities are platform facts.
+
+## 4. Per-Vendor Designs
+
+### 4.1 Qwen via DashScope Native SDK
+
+**Why native (not OpenAI-compatible mode):** DashScope's native API unlocks Qwen-Audio, Qwen-Long (1M+ context with custom chunking), Qwen-VL-Max (enhanced vision), and DashScope-specific tool format with `parameters` schema. OpenAI-compatible mode loses these.
+
+**SDK:** `dashscope` (added to `pyproject.toml` dependencies).
+
+**State (module-level globals, following the existing pattern):**
+```python
+_qwen_client: dashscope.Generation | None = None
+_qwen_history: list[dict[str, Any]] = []
+_qwen_history_lock: threading.Lock = threading.Lock()
+```
+
+**Credentials:** `credentials.toml` `[qwen]` section with `api_key` and optional `region` (default: `china`; alternatives: `international`).
+
+**Configuration per-project (TOML):** `provider = "qwen"`, `qwen_model = "qwen-max"`. Optional `qwen_region = "international"`.
+
+**Models shipped in the capability registry (v1):**
+
+| Model | vision | tool_calling | caching | context_window | cost_input | cost_output |
+|---|---|---|---|---|---|---|
+| `qwen-turbo` | false | true | false | 1,000,000 | $0.05 | $0.10 |
+| `qwen-plus` | false | true | false | 131,072 | $0.40 | $1.20 |
+| `qwen-max` | false | true | false | 32,768 | $2.00 | $6.00 |
+| `qwen-long` | false | true | false | 1,000,000 | $0.07 | $0.28 |
+| `qwen-vl-plus` | true | true | false | 131,072 | $0.21 | $0.63 |
+| `qwen-vl-max` | true | true | false | 32,768 | $0.50 | $1.50 |
+| `qwen-audio` | true (audio) | true | false | 32,768 | $0.10 | $0.30 |
+
+(Pricing from Alibaba Cloud DashScope public pricing as of 2026-06-06; update if needed.)
+
+**Entry point:** `_send_qwen()` in `src/ai_client.py`. Calls a DashScope-specific helper (not the OpenAI-compatible one) because DashScope's request/response shape differs.
+
+**Tool format translation:** DashScope uses a slightly different tool schema than OpenAI. The Qwen adapter translates from the normalized tool definitions (OpenAI-shaped) to DashScope's `tools: list[dict]` with `parameters: dict` schema.
+
+**Vision / audio:** Qwen-VL accepts image URLs or base64; Qwen-Audio accepts audio URLs or base64. The adapter handles the multipart encoding.
+
+**Error classification:** `_classify_qwen_error()` maps DashScope exceptions to `ProviderError` kinds (`quota`, `rate_limit`, `auth`, `balance`, `network`).
+
+**Model discovery:** DashScope exposes a `list_models` API. `_list_qwen_models()` returns the hardcoded registry (DashScope doesn't have a great runtime discovery API; the hardcoded list is the source of truth).
+
+**Vision support:** Qwen-Audio and Qwen-VL-* register `vision: true`. The UX's screenshot button is enabled for those models. For Qwen-Audio, the screenshot button is replaced with an audio attachment button (deferred to v2; for v1, audio attachment is wired but the button is hidden — see §6).
+
+### 4.2 Llama (Ollama + OpenRouter + Custom URL)
+
+**Why three backends:** Llama has no first-party API. The "vendor" is the model family; the backend is per-project config.
+- **Ollama** (local, ubiquitous): OpenAI-compatible at `http://localhost:11434/v1`. Free.
+- **OpenRouter** (cloud aggregator): OpenAI-compatible at `https://openrouter.ai/api/v1`. Single API key covers Together, Groq, Fireworks, etc.
+- **Custom URL** (escape hatch): any OpenAI-compatible endpoint. For self-hosted vLLM, llama.cpp, LM Studio, or any unusual cloud.
+
+**SDK:** `openai` (already a dependency, used for MiniMax).
+
+**State (module-level globals):**
+```python
+_llama_client: OpenAI | None = None
+_llama_history: list[dict[str, Any]] = []
+_llama_history_lock: threading.Lock = threading.Lock()
+_llama_base_url: str = "http://localhost:11434/v1"  # default
+_llama_api_key: str = "ollama"                      # Ollama doesn't require auth
+```
+
+**Credentials:** `credentials.toml` `[llama]` section with `api_key` (empty for Ollama) and `base_url`.
+
+**Configuration per-project (TOML):** `provider = "llama"`, `llama_model = "llama-3.3-70b"`, `llama_base_url = "https://openrouter.ai/api/v1"`, `llama_api_key_env = "OPENROUTER_API_KEY"` (optional env override).
+
+**Models shipped in the capability registry (v1):**
+
+| Model | vision | tool_calling | caching | context_window | cost_input | cost_output |
+|---|---|---|---|---|---|---|
+| `llama-3.1-8b-instant` | false | true | false | 131,072 | $0.05 (Groq) | $0.08 |
+| `llama-3.1-70b-versatile` | false | true | false | 131,072 | $0.59 (Groq) | $0.79 |
+| `llama-3.1-405b-reasoning` | false | true | false | 131,072 | $3.00 (OpenRouter avg) | $3.00 |
+| `llama-3.2-1b-preview` | false | true | false | 131,072 | $0.04 | $0.04 |
+| `llama-3.2-3b-preview` | false | true | false | 131,072 | $0.06 | $0.06 |
+| `llama-3.2-11b-vision-preview` | true | true | false | 131,072 | $0.18 | $0.18 |
+| `llama-3.2-90b-vision-preview` | true | true | false | 131,072 | $0.90 | $0.90 |
+| `llama-3.3-70b-specdec` | false | true | false | 131,072 | $0.59 (Groq) | $0.79 |
+| `llama-*` (wildcard) | model-specific | true | false | 131,072 | $0 | $0 |
+
+(Pricing varies by backend; registry entries represent the most common case. Cost overrides per-project allowed via TOML.)
+
+**Local backend default:** When `llama_base_url` is `http://localhost:11434/v1` and `llama_api_key` is empty, `cost_tracking: false` (free). UX cost panel shows "Free (local)" instead of an estimate.
+
+**Entry point:** `_send_llama()` in `src/ai_client.py`. Calls the shared `send_openai_compatible()` helper.
+
+**Tool format:** Native OpenAI (Llama backends all use OpenAI's tool format). No translation needed.
+
+**Error classification:** `_classify_llama_error()` — same as MiniMax's error classifier (OpenAI SDK errors are uniform across backends).
+
+**Model discovery:** Ollama exposes `GET /api/tags` (not `/v1/models`); OpenRouter exposes `GET /v1/models`. The Llama adapter probes both endpoints and unions the results. For custom URLs, falls back to the hardcoded registry.
+
+### 4.3 Grok via xAI (OpenAI-Compatible)
+
+**SDK:** `openai` (already a dependency).
+
+**State:**
+```python
+_grok_client: OpenAI | None = None
+_grok_history: list[dict[str, Any]] = []
+_grok_history_lock: threading.Lock = threading.Lock()
+```
+
+**Credentials:** `credentials.toml` `[grok]` section with `api_key`. (xAI's `base_url` is hardcoded to `https://api.x.ai/v1`.)
+
+**Configuration per-project (TOML):** `provider = "grok"`, `grok_model = "grok-2"`.
+
+**Models shipped in the capability registry (v1):**
+
+| Model | vision | tool_calling | caching | context_window | cost_input | cost_output |
+|---|---|---|---|---|---|---|
+| `grok-2` | false | true | false | 131,072 | $2.00 | $10.00 |
+| `grok-2-vision` | true | true | false | 32,768 | $2.00 | $10.00 |
+| `grok-beta` | false | true | false | 131,072 | $5.00 | $15.00 |
+
+(Pricing from x.ai public pricing as of 2026-06-06; update if needed.)
+
+**Entry point:** `_send_grok()` in `src/ai_client.py`. Calls `send_openai_compatible()` with the xAI base URL.
+
+**Tool format:** Native OpenAI. No translation needed.
+
+**Vision:** Grok-2-Vision accepts image URLs or base64. The OpenAI-compatible helper already handles vision via the OpenAI SDK's multimodal message format.
+
+**Error classification:** Same as OpenAI-compatible vendors (uniform error shape via the openai SDK).
+
+**Model discovery:** xAI exposes `GET /v1/models`. Standard OpenAI-compatible discovery.
+
+## 5. Shared OpenAI-Compatible Helper
+
+### 5.1 Module: `src/openai_compatible.py`
+
+```python
+from dataclasses import dataclass
+from typing import Any, Callable, Optional
+from openai import OpenAI, OpenAIError
+
+@dataclass(frozen=True)
+class NormalizedResponse:
+ text: str
+ tool_calls: list[dict[str, Any]]
+ usage_input_tokens: int
+ usage_output_tokens: int
+ usage_cache_read_tokens: int
+ usage_cache_creation_tokens: int
+ raw_response: Any
+
+@dataclass
+class OpenAICompatibleRequest:
+ messages: list[dict[str, Any]]
+ tools: Optional[list[dict[str, Any]]] = None
+ model: str = ""
+ temperature: float = 0.0
+ top_p: float = 1.0
+ max_tokens: int = 8192
+ stream: bool = False
+ stream_callback: Optional[Callable[[str], None]] = None
+
+def send_openai_compatible(
+ client: OpenAI,
+ request: OpenAICompatibleRequest,
+ *,
+ capabilities: VendorCapabilities,
+) -> NormalizedResponse: ...
+```
+
+The helper:
+1. Translates `request.messages` into the OpenAI SDK's `messages` parameter (passthrough — already in OpenAI shape).
+2. Translates `request.tools` if non-None (passthrough for now; future: strip unsupported fields based on `capabilities`).
+3. Calls `client.chat.completions.create(...)` with the right `model`, `temperature`, `top_p`, `max_tokens`, `stream`, `tools`, `tool_choice="auto"`.
+4. If streaming: aggregates chunks; calls `stream_callback(text_chunk)` for each text delta; collects final usage from the last chunk.
+5. If non-streaming: parses the response in one shot.
+6. Returns a `NormalizedResponse` with text, tool calls (in OpenAI shape), usage stats.
+7. On exception: classifies the OpenAI exception and re-raises as `ProviderError` (using `_classify_openai_compatible_error()`).
+
+The helper is the **algorithm on the data**. Per-vendor adapters (Llama, Grok, MiniMax) are the **boundary code that converts vendor-specific state to/from the normalized form**.
+
+### 5.2 Refactor of `_send_minimax()`
+
+**Before:** ~250 lines of inline OpenAI-compatible send logic (lines 2103-2264 of `src/ai_client.py` per the existing grep). Mixes client init, message building, API call, response parsing, tool call handling, history repair, error classification.
+
+**After:** ~50 lines. `_send_minimax()` becomes:
+```python
+def _send_minimax(md_content, user_message, base_dir, file_items, discussion_history, ...):
+ _ensure_minimax_client()
+ with _minimax_history_lock:
+ _repair_minimax_history(_minimax_history)
+ if discussion_history and not _minimax_history:
+ _minimax_history.extend(_parse_discussion_history(discussion_history))
+ _minimax_history.append({"role": "user", "content": _build_user_content(...)})
+ 
+ request = OpenAICompatibleRequest(
+ messages=_minimax_history,
+ tools=_build_tools(...),
+ model=_model,
+ temperature=_temperature,
+ top_p=_top_p,
+ max_tokens=_max_tokens,
+ stream=True,
+ stream_callback=stream_callback,
+ )
+ caps = get_capabilities("minimax", _model)
+ response = send_openai_compatible(_minimax_client, request, capabilities=caps)
+ 
+ # Append response to history (same logic as today)
+ ...
+ return response.text
+```
+
+The behavior is identical; the code is shorter. `tests/test_minimax_provider.py` is the safety net (existing test coverage should pass without modification).
+
+## 6. UX Adaptation (Capability-Driven UI)
+
+The GUI reads `get_capabilities(active_vendor, active_model)` once per render frame and stores it in a local. Specific adaptations:
+
+| UI Element | Behavior based on matrix |
+|---|---|
+| **Screenshot button** (Message panel) | Enabled iff `vision: true`. Tooltip explains why if disabled. |
+| **Audio attachment button** (Message panel) | **Deferred to v2.** Stub: always hidden in v1. |
+| **Tools enabled toggle** (Message panel) | Enabled iff `tool_calling: true`. |
+| **Cache panel** (Operations Hub) | Visible iff `caching: true`. |
+| **Cache indicators** (Token budget) | Shown iff `caching: true`. |
+| **Stream progress** (Response panel) | Visible iff `streaming: true`. |
+| **Fetch Models button** (AI Settings) | Enabled iff `model_discovery: true`. |
+| **Token budget max** (Token budget) | Set to `capabilities.context_window`. |
+| **Cost estimate** (MMA Dashboard) | Shown iff `cost_tracking: true`; shows "Free (local)" for `cost_tracking: false` + `base_url` containing `localhost`/`127.0.0.1`; shows "—" for other `cost_tracking: false` cases. |
+
+The adaptations are gated on the capability value, not on vendor name. The `gui_2.py` change is one new helper: `def _get_active_capabilities(self) -> VendorCapabilities: return get_capabilities(self._provider, self._model)`. The render functions query this once at the top of their scope.
+
+## 7. Configuration
+
+### 7.1 `pyproject.toml` — new dependency
+
+```toml
+[project]
+dependencies = [
+ ...
+ "dashscope>=1.14.0",  # NEW
+ "openai>=1.0.0",       # already a dependency
+]
+```
+
+### 7.2 `credentials.toml` — new sections
+
+```toml
+[qwen]
+api_key = "YOUR_DASHSCOPE_KEY"
+# region = "china"  # default; "international" also valid
+
+[llama]
+# api_key = "YOUR_OPENROUTER_KEY"  # required for OpenRouter; empty for Ollama
+# base_url = "https://openrouter.ai/api/v1"  # default for cloud; "http://localhost:11434/v1" for Ollama
+
+[grok]
+api_key = "YOUR_XAI_KEY"
+```
+
+### 7.3 Per-project TOML — provider selection
+
+```toml
+[ai]
+provider = "qwen"          # "qwen" | "llama" | "grok" | (existing: "gemini", "anthropic", ...)
+model = "qwen-vl-max"
+qwen_region = "china"      # vendor-specific
+# OR
+llama_base_url = "https://openrouter.ai/api/v1"
+llama_api_key_env = "OPENROUTER_API_KEY"  # optional: read key from env
+# OR
+grok_model = "grok-2-vision"
+```
+
+## 8. Testing Strategy
+
+| Test File | Purpose | Coverage Target |
+|---|---|---|
+| `tests/test_vendor_capabilities.py` | Registry lookup, fallback to vendor default, per-model overrides. | 100% |
+| `tests/test_openai_compatible.py` | Request building, response parsing, streaming aggregation, tool call detection, error classification. | 90% |
+| `tests/test_qwen_provider.py` | DashScope adapter, tool format translation, Qwen-VL vision, Qwen-Audio stub. | 80% |
+| `tests/test_llama_provider.py` | Multi-backend (Ollama mock + OpenRouter mock), model discovery union, custom URL fallback. | 80% |
+| `tests/test_grok_provider.py` | xAI endpoint, Grok-2-Vision vision, model discovery. | 80% |
+| `tests/test_minimax_provider.py` (modified) | Verify refactor preserves behavior. Existing tests should pass unmodified. | 100% (regression) |
+
+**Mocking strategy:** All tests use `unittest.mock.patch` on the vendor SDKs (DashScope, OpenAI). No real API calls. The `RUN_REAL_AI_TESTS=1` env var continues to gate opt-in real-API tests (out of scope for this track).
+
+**Integration verification:** Manual smoke test in the GUI: select Qwen provider, send a message with a tool call, confirm the tool executes. Repeat for Llama and Grok. Document the smoke test results in the Phase 4 checkpoint git note.
+
+## 9. Migration / Rollout
+
+| Phase | What | Risk |
+|---|---|---|
+| **Phase 1 — Capability matrix framework + shared helper** | Add `src/vendor_capabilities.py` and `src/openai_compatible.py`. Add unit tests for both. Add `dashscope` to `pyproject.toml`. No user-facing changes. | Low. New files, no modifications to `ai_client.py`. |
+| **Phase 2 — Qwen via DashScope** | Implement `_send_qwen()` in `src/ai_client.py`. Add `[qwen]` to credentials template. Register `qwen` in `PROVIDERS` lists. Populate capability registry for Qwen models. | Medium. New SDK, new code path, new credentials section. |
+| **Phase 3 — Grok + Llama via shared helper** | Implement `_send_grok()` and `_send_llama()`. Both call `send_openai_compatible()`. Add `[grok]` and `[llama]` credentials sections. Register in PROVIDERS lists. | Medium. New code paths, but lighter than Qwen (OpenAI-compatible). |
+| **Phase 4 — MiniMax refactor** | Refactor `_send_minimax()` to use the shared helper. Verify all existing `tests/test_minimax_provider.py` tests pass. | Medium-High. Touching working code. Mitigated by existing test coverage. |
+| **Phase 5 — UX adaptation + integration** | Add `_get_active_capabilities()` to `gui_2.py`. Apply the 9 UI adaptations from §6. Run the full test suite. | Low. UI-only changes. |
+| **Phase 6 — Docs + archive** | Update `docs/guide_ai_client.md` to document the new vendors, the capability matrix, and the shared helper. Update `docs/guide_models.md` for the new PROVIDERS entries. Archive the track. | Low. |
+
+Each phase has its own checkpoint commit and git note.
+
+## 10. Risks & Mitigations
+
+| Risk | Likelihood | Impact | Mitigation |
+|---|---|---|---|
+| MiniMax refactor breaks existing behavior. | Medium | High (regresses a working provider) | `tests/test_minimax_provider.py` is the safety net. Run it after every change. If it fails, the refactor is incorrect — fix forward, don't revert. |
+| DashScope SDK has API differences from documentation (e.g., response shape). | Medium | Medium | Pin to a specific DashScope version (`>=1.14.0,<2.0.0`). Test against the actual SDK in CI. |
+| OpenRouter pricing varies by underlying model; registry entries may be inaccurate. | High | Low (cost estimates are advisory) | Cost panel shows "Estimate" with a tooltip. Add a "Pricing source: x" line. |
+| Ollama's `/api/tags` shape differs from `/v1/models`; the union function may miss models. | Low | Low (model list is a convenience) | Fall back to the hardcoded registry. Manual override per-project via TOML. |
+| Capability matrix drift: a model ships a new feature (e.g., Qwen-Plus gains vision) but the registry says `vision: false`. | Medium | Low (user sees a missing feature) | Document the update process: edit `src/vendor_capabilities.py`, add a test, commit. Make the registry the canonical place to look. |
+| Local backends (Ollama) need CORS / firewall configured for the GUI to talk to them. | Low | Medium (user can't connect) | Document the Ollama setup in the credentials template comments. Reference the Ollama docs for `OLLAMA_ORIGINS`. |
+| Llama backends may rate-limit aggressively (especially free tiers of OpenRouter). | Medium | Low | The existing `_classify_openai_compatible_error()` already maps 429 to `rate_limit`. The error UI surfaces this clearly. |
+
+## 11. Out of Scope (Explicit)
+
+- **Audio input support** (Qwen-Audio, future Grok-Audio). Deferred to a follow-up track that adds an audio attachment button to the message panel and a `audio_input` capability to the matrix.
+- **Server-side code execution** (Anthropic, OpenAI, Gemini). Deferred; the matrix has a placeholder entry `server_side_code_execution: false` for all v1 vendors.
+- **Anthropic / Gemini / DeepSeek capability matrix migration**. Tracked as a separate track ("Open-Vendor Matrix Migration Phase 2" — see §13.1). Their unique APIs need careful, vendor-by-vendor migration.
+- **Batch API support** for any of the three new vendors. Not requested.
+- **Fine-tuning management** for any of the three new vendors. Not requested.
+- **Image generation** (DALL-E, Midjourney, etc.). Not in scope; the matrix has a placeholder `image_generation: false`.
+- **PDF input** (Gemini, Anthropic). Deferred.
+
+## 12. Open Questions
+
+1. **Per-model cost overrides:** Should `manual_slop.toml` allow per-project cost overrides for Llama backends (since pricing varies by which underlying provider OpenRouter routes to)? (Proposal: yes; add `llama_cost_input` / `llama_cost_output` to the per-project TOML.)
+2. **Default Llama base URL:** Should the default be Ollama (`localhost:11434`) or OpenRouter? (Proposal: Ollama for the "first-time user gets a working setup" experience; OpenRouter requires an API key.)
+3. **DashScope region selection:** How does the user pick `china` vs `international`? Per-project TOML (`qwen_region = "international"`) or env var (`DASHSCOPE_REGION`)? (Proposal: both; TOML wins.)
+4. **Qwen-Coder and Qwen-Math specialized models:** Include in v1 or defer? (Proposal: defer to v1.1; the matrix entry is trivial but the model-specific prompting optimization is out of scope.)
+
+## 13. See Also
+
+### 13.1 Follow-up Track (separate plan)
+
+**"Anthropic / Gemini / DeepSeek Capability Matrix Migration"** — Migrates the three remaining providers onto the same capability matrix. Required pre-work: ensure the matrix's per-model lookup pattern handles the `caching: true` (Anthropic 4-breakpoint, Gemini explicit) and `pdf_input: true` (Anthropic, Gemini) capabilities. Each provider keeps its unique per-vendor code path (the 4-breakpoint system, the genai SDK); the matrix entries are populated so the UX can adapt. This is a separate track because the migration of each unique-API provider is non-trivial and the risk of regressing the existing working code is high.
+
+### 13.2 Project References
+
+- `docs/guide_ai_client.md` — current `ai_client.py` architecture; will be updated in Phase 6 to document the matrix and the shared helper.
+- `docs/guide_models.md` — current PROVIDERS constant and provider metadata; will be updated in Phase 6.
+- `conductor/tracks/openai_integration_20260308/` — closest prior art (single provider, OpenAI-compatible).
+- `conductor/tracks/zhipu_integration_20260308/` — second prior art (single provider, custom API).
+- `conductor/tracks/startup_speedup_20260606/` — example of an active track in this project (same convention).
+- `conductor/tracks/test_batching_refactor_20260606/` — second example of an active track in this project.
+- `conductor/product.md` "Multi-Provider Integration" — product-level overview of the multi-provider architecture.
+- `conductor/product-guidelines.md` "Modular Controller Pattern" — the convention this track follows for `vendor_capabilities.py` and `openai_compatible.py` as standalone modules.
+
+### 13.3 External References
+
+- **Ryan Fleury on code/data separation** — informs the data-oriented design (vendor capabilities as data, helper as algorithm, per-vendor code as boundary adapter).
+- **Mike Acton on data-oriented design** — informs the SoA-like layout of the capability matrix and the "transform data, don't mutate state" framing.
+- **Timothy Lottes on cache-aware algorithms** — informs the helper's streaming aggregation (bulk-process chunks, minimize per-chunk overhead).
+- **Alibaba DashScope documentation** — `https://help.aliyun.com/zh/model-studio/` for the native API reference.
+- **OpenRouter API documentation** — `https://openrouter.ai/docs` for the cloud aggregator.
+- **Ollama OpenAI compatibility** — `https://github.com/ollama/ollama/blob/main/docs/openai.md` for the local backend.
+- **xAI API documentation** — `https://docs.x.ai/` for the Grok endpoint.
@@ -0,0 +1,134 @@
+# Track state for qwen_llama_grok_integration_20260606
+# Updated by Tier 2 Tech Lead as tasks complete
+
+[meta]
+track_id = "qwen_llama_grok_integration_20260606"
+name = "Qwen, Llama & Grok Vendor Integration + Capability Matrix"
+status = "active"
+current_phase = 0
+last_updated = "2026-06-06"
+
+[phases]
+# Phase 1: Capability matrix framework + shared helper (no user-facing changes)
+phase_1 = { status = "pending", checkpoint_sha = "", name = "Capability matrix framework + shared helper" }
+# Phase 2: Qwen via DashScope
+phase_2 = { status = "pending", checkpoint_sha = "", name = "Qwen via DashScope" }
+# Phase 3: Grok + Llama via shared helper
+phase_3 = { status = "pending", checkpoint_sha = "", name = "Grok + Llama via shared helper" }
+# Phase 4: MiniMax refactor
+phase_4 = { status = "pending", checkpoint_sha = "", name = "MiniMax refactor to use shared helper" }
+# Phase 5: UX adaptation + integration
+phase_5 = { status = "pending", checkpoint_sha = "", name = "UX adaptation + integration" }
+# Phase 6: Docs + archive
+phase_6 = { status = "pending", checkpoint_sha = "", name = "Docs + archive" }
+
+[tasks]
+# Phase 1: Capability matrix framework + shared helper
+# (Tasks TBD by writing-plans; placeholder structure only)
+t1_1 = { status = "pending", commit_sha = "", description = "Red: tests/test_vendor_capabilities.py::test_registry_lookup_known_model" }
+t1_2 = { status = "pending", commit_sha = "", description = "Red: tests/test_vendor_capabilities.py::test_fallback_to_vendor_default" }
+t1_3 = { status = "pending", commit_sha = "", description = "Red: tests/test_vendor_capabilities.py::test_unknown_vendor_raises" }
+t1_4 = { status = "pending", commit_sha = "", description = "Green: implement src/vendor_capabilities.py with VendorCapabilities + get_capabilities + initial registry" }
+t1_5 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_send_non_streaming" }
+t1_6 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_send_streaming_aggregates_chunks" }
+t1_7 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_tool_call_detection" }
+t1_8 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_vision_multimodal_message" }
+t1_9 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_error_classification_429_to_rate_limit" }
+t1_10 = { status = "pending", commit_sha = "", description = "Green: implement src/openai_compatible.py with NormalizedResponse + OpenAICompatibleRequest + send_openai_compatible" }
+t1_11 = { status = "pending", commit_sha = "", description = "Add dashscope>=1.14.0,<2.0.0 to pyproject.toml dependencies" }
+t1_12 = { status = "pending", commit_sha = "", description = "Phase 1 checkpoint commit + git note" }
+# Phase 2: Qwen via DashScope
+t2_1 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_send_qwen_routes_to_dashscope" }
+t2_2 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_qwen_tool_format_translation" }
+t2_3 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_qwen_vl_vision_image_base64" }
+t2_4 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_qwen_error_classification" }
+t2_5 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_list_qwen_models" }
+t2_6 = { status = "pending", commit_sha = "", description = "Green: implement _send_qwen, _ensure_qwen_client, _classify_qwen_error, _list_qwen_models in src/ai_client.py" }
+t2_7 = { status = "pending", commit_sha = "", description = "Add [qwen] section to credentials_template.toml" }
+t2_8 = { status = "pending", commit_sha = "", description = "Add qwen to PROVIDERS in src/gui_2.py and src/app_controller.py" }
+t2_9 = { status = "pending", commit_sha = "", description = "Add Qwen models to capability registry in src/vendor_capabilities.py" }
+t2_10 = { status = "pending", commit_sha = "", description = "Add Qwen pricing to src/cost_tracker.py" }
+t2_11 = { status = "pending", commit_sha = "", description = "Phase 2 checkpoint commit + git note" }
+# Phase 3: Grok + Llama via shared helper
+t3_1 = { status = "pending", commit_sha = "", description = "Red: tests/test_grok_provider.py::test_send_grok_uses_xai_endpoint" }
+t3_2 = { status = "pending", commit_sha = "", description = "Red: tests/test_grok_provider.py::test_grok_2_vision_vision_support" }
+t3_3 = { status = "pending", commit_sha = "", description = "Green: implement _send_grok, _ensure_grok_client in src/ai_client.py" }
+t3_4 = { status = "pending", commit_sha = "", description = "Add [grok] section to credentials_template.toml" }
+t3_5 = { status = "pending", commit_sha = "", description = "Add grok to PROVIDERS in src/gui_2.py and src/app_controller.py" }
+t3_6 = { status = "pending", commit_sha = "", description = "Add Grok models to capability registry" }
+t3_7 = { status = "pending", commit_sha = "", description = "Add Grok pricing to src/cost_tracker.py" }
+t3_8 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_send_llama_ollama_backend" }
+t3_9 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_send_llama_openrouter_backend" }
+t3_10 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_send_llama_custom_url" }
+t3_11 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_llama_model_discovery_unions_ollama_and_openrouter" }
+t3_12 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_llama_3_2_vision_vision_support" }
+t3_13 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_llama_local_backend_cost_tracking_false" }
+t3_14 = { status = "pending", commit_sha = "", description = "Green: implement _send_llama, _ensure_llama_client, _list_llama_models in src/ai_client.py" }
+t3_15 = { status = "pending", commit_sha = "", description = "Add [llama] section to credentials_template.toml" }
+t3_16 = { status = "pending", commit_sha = "", description = "Add llama to PROVIDERS in src/gui_2.py and src/app_controller.py" }
+t3_17 = { status = "pending", commit_sha = "", description = "Add Llama models to capability registry" }
+t3_18 = { status = "pending", commit_sha = "", description = "Phase 3 checkpoint commit + git note" }
+# Phase 4: MiniMax refactor
+t4_1 = { status = "pending", commit_sha = "", description = "Baseline: run tests/test_minimax_provider.py; all pass (green)" }
+t4_2 = { status = "pending", commit_sha = "", description = "Refactor _send_minimax to use send_openai_compatible helper" }
+t4_3 = { status = "pending", commit_sha = "", description = "Verify tests/test_minimax_provider.py still pass (no regressions)" }
+t4_4 = { status = "pending", commit_sha = "", description = "Add MiniMax to capability registry (per-model: minimax-* entries with vision/tool/cost)" }
+t4_5 = { status = "pending", commit_sha = "", description = "Run full test suite; ensure no regressions" }
+t4_6 = { status = "pending", commit_sha = "", description = "Phase 4 checkpoint commit + git note" }
+# Phase 5: UX adaptation + integration
+t5_1 = { status = "pending", commit_sha = "", description = "Add _get_active_capabilities() helper to src/gui_2.py" }
+t5_2 = { status = "pending", commit_sha = "", description = "Apply 9 UX adaptations from spec.md §6 (vision, tools, cache, stream, fetch models, context window, cost)" }
+t5_3 = { status = "pending", commit_sha = "", description = "Update _predefined_callbacks / _gettable_fields to expose new provider selection" }
+t5_4 = { status = "pending", commit_sha = "", description = "Run full test suite; ensure no regressions in live_gui tests" }
+t5_5 = { status = "pending", commit_sha = "", description = "Manual smoke test: select Qwen, send message, tool executes; repeat for Llama, Grok" }
+t5_6 = { status = "pending", commit_sha = "", description = "Phase 5 checkpoint commit + git note" }
+# Phase 6: Docs + archive
+t6_1 = { status = "pending", commit_sha = "", description = "Update docs/guide_ai_client.md: new vendors section, capability matrix section, shared helper section" }
+t6_2 = { status = "pending", commit_sha = "", description = "Update docs/guide_models.md: new PROVIDERS entries for qwen/llama/grok" }
+t6_3 = { status = "pending", commit_sha = "", description = "git mv conductor/tracks/qwen_llama_grok_integration_20260606 to conductor/tracks/archive/" }
+t6_4 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md: move entry from Backlog to Recently Completed" }
+t6_5 = { status = "pending", commit_sha = "", description = "Final checkpoint commit + git note" }
+
+[verification]
+# Filled as phases complete
+phase_1_capability_registry_complete = false
+phase_1_shared_helper_complete = false
+phase_2_qwen_dashscope_complete = false
+phase_3_grok_complete = false
+phase_3_llama_complete = false
+phase_4_minimax_refactor_preserves_tests = false
+phase_5_ux_adaptations_complete = false
+phase_5_smoke_test_passed = false
+phase_6_docs_updated = false
+phase_6_track_archived = false
+full_test_suite_passes = false
+no_new_threading_thread_calls = false
+
+[openai_compatible_models]
+# Filled as models are added to capability registry
+qwen_turbo = false
+qwen_plus = false
+qwen_max = false
+qwen_long = false
+qwen_vl_plus = false
+qwen_vl_max = false
+qwen_audio = false
+llama_3_1_8b = false
+llama_3_1_70b = false
+llama_3_1_405b = false
+llama_3_2_1b = false
+llama_3_2_3b = false
+llama_3_2_11b_vision = false
+llama_3_2_90b_vision = false
+llama_3_3_70b = false
+grok_2 = false
+grok_2_vision = false
+grok_beta = false
+minimax_models_refactored = false
+
+[minimax_refactor_stats]
+# Filled in Phase 4
+lines_before = 0
+lines_after = 0
+tests_passing = 0
+tests_failing = 0