From 7c1d597ef175d29bbaaf0733227dd60abe1cc3bf Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sat, 6 Jun 2026 14:56:00 -0400 Subject: [PATCH] conductor(track): Initialize qwen_llama_grok_integration_20260606 spec Three new vendors + capability matrix framework + MiniMax refactor: **Capability matrix v1 (7 features):** vision, tool_calling, caching, streaming, model_discovery, context_window, cost_tracking. Audio and server-side code execution deferred to a follow-up track. **Qwen via DashScope native SDK:** Qwen-Turbo, Qwen-Plus, Qwen-Max, Qwen-Long (1M context), Qwen-VL-Plus/Max (vision), Qwen-Audio. Native API chosen over OpenAI-compatible mode to unlock Qwen-Audio, Qwen-Long custom chunking, and Qwen-VL-Max enhanced vision. **Llama (OpenAI-compatible, multi-backend):** Ollama (local, free), OpenRouter (cloud aggregator covering Together/Groq/Fireworks), custom URL escape hatch. Models: Llama 3.1 8B/70B/405B, 3.2 1B/3B, 3.2 11B/90B Vision, 3.3 70B. **Grok via xAI (OpenAI-compatible):** Grok-2, Grok-2-Vision, Grok-Beta. **Shared OpenAI-compatible helper** in src/openai_compatible.py processes a normalized request/response data structure; each _send_() is a thin adapter at the boundary (data-oriented design per Fleury/Acton/Lottes). **MiniMax refactor:** ~250 lines reduced to ~50 by using the shared helper. Existing test_minimax_provider.py is the safety net. **UX adaptation:** 9 UI elements (screenshot, tools toggle, cache panel, stream progress, fetch models, token budget, cost panel) read from the matrix instead of hard-coding per-vendor branches. **Out of scope (deferred):** Anthropic/Gemini/DeepSeek migration to the matrix (separate track), audio input, server-side code execution, PDF input, batch API, fine-tuning. 6 phases planned: matrix+helper, Qwen, Grok+Llama, MiniMax refactor, UX adaptation, docs+archive. --- .../metadata.json | 122 +++++ .../spec.md | 483 ++++++++++++++++++ .../state.toml | 134 +++++ 3 files changed, 739 insertions(+) create mode 100644 conductor/tracks/qwen_llama_grok_integration_20260606/metadata.json create mode 100644 conductor/tracks/qwen_llama_grok_integration_20260606/spec.md create mode 100644 conductor/tracks/qwen_llama_grok_integration_20260606/state.toml diff --git a/conductor/tracks/qwen_llama_grok_integration_20260606/metadata.json b/conductor/tracks/qwen_llama_grok_integration_20260606/metadata.json new file mode 100644 index 00000000..82c6fcda --- /dev/null +++ b/conductor/tracks/qwen_llama_grok_integration_20260606/metadata.json @@ -0,0 +1,122 @@ +{ + "track_id": "qwen_llama_grok_integration_20260606", + "name": "Qwen, Llama & Grok Vendor Integration + Capability Matrix", + "initialized": "2026-06-06", + "owner": "tier2-tech-lead", + "priority": "high", + "status": "active", + "type": "feature + refactor", + "scope": { + "new_files": [ + "src/vendor_capabilities.py", + "src/openai_compatible.py", + "tests/test_vendor_capabilities.py", + "tests/test_openai_compatible.py", + "tests/test_qwen_provider.py", + "tests/test_llama_provider.py", + "tests/test_grok_provider.py" + ], + "modified_files": [ + "src/ai_client.py", + "src/cost_tracker.py", + "src/models.py", + "src/gui_2.py", + "src/app_controller.py", + "credentials_template.toml", + "pyproject.toml", + "tests/test_minimax_provider.py", + "docs/guide_ai_client.md", + "docs/guide_models.md" + ] + }, + "blocked_by": [], + "blocks": ["anthropic_gemini_deepseek_capability_matrix_20260606" /* not yet created; conceptual follow-up */], + "estimated_phases": 6, + "spec": "spec.md", + "plan": "plan.md", + "priority_order": "A (capability matrix framework + 3 new vendors) > B (shared helper + MiniMax refactor) > C (UX adaptation + docs)", + "capability_matrix_v1": ["vision", "tool_calling", "caching", "streaming", "model_discovery", "context_window", "cost_tracking"], + "capability_matrix_deferred": ["audio_input", "pdf_input", "server_side_code_execution", "image_generation", "fine_tuning", "batch_api"], + "data_oriented_design": { + "shared_data_structure": "NormalizedResponse (text, tool_calls, usage_*) + OpenAICompatibleRequest (messages, tools, model, ...)", + "shared_algorithm": "send_openai_compatible(client, request, capabilities) -> NormalizedResponse in src/openai_compatible.py", + "per_vendor_boundary": "Each _send_() is a thin adapter: init client, load history, call shared helper, update history, return text", + "philosophy_references": ["Ryan Fleury (code/data separation)", "Mike Acton (data-oriented design)", "Timothy Lottes (cache-aware algorithms)"] + }, + "vendors_added": { + "qwen": { + "api": "DashScope native SDK", + "rationale": "Qwen-Audio, Qwen-Long (1M context), Qwen-VL-Max require native API; OpenAI-compatible mode loses them", + "sdk": "dashscope>=1.14.0", + "models_shipped": ["qwen-turbo", "qwen-plus", "qwen-max", "qwen-long", "qwen-vl-plus", "qwen-vl-max", "qwen-audio"] + }, + "llama": { + "api": "OpenAI-compatible (multi-backend)", + "rationale": "Llama has no first-party API; backend is per-project config", + "backends_v1": ["ollama (local)", "openrouter (cloud aggregator)", "custom_url (escape hatch)"], + "models_shipped": ["llama-3.1-8b-instant", "llama-3.1-70b-versatile", "llama-3.1-405b-reasoning", "llama-3.2-1b-preview", "llama-3.2-3b-preview", "llama-3.2-11b-vision-preview", "llama-3.2-90b-vision-preview", "llama-3.3-70b-specdec"] + }, + "grok": { + "api": "xAI (OpenAI-compatible)", + "rationale": "xAI's API is OpenAI-compatible; value is filling the matrix entry and exposing Grok-2-Vision", + "sdk": "openai>=1.0.0 (already a dependency)", + "models_shipped": ["grok-2", "grok-2-vision", "grok-beta"] + } + }, + "refactor_scope": { + "minimax": "Refactor _send_minimax() (~250 lines) to use send_openai_compatible() helper (~50 lines)", + "anthropic": "DEFERRED to follow-up track", + "gemini": "DEFERRED to follow-up track", + "deepseek": "DEFERRED to follow-up track" + }, + "ux_adaptations": [ + "Screenshot button enabled iff vision=true", + "Tools enabled toggle enabled iff tool_calling=true", + "Cache panel visible iff caching=true", + "Stream progress visible iff streaming=true", + "Fetch Models button enabled iff model_discovery=true", + "Token budget max = capabilities.context_window", + "Cost panel shows estimate iff cost_tracking=true", + "Cost panel shows 'Free (local)' for localhost + cost_tracking=false", + "Cost panel shows '—' for other cost_tracking=false cases" + ], + "architectural_invariant": "Every _send_() is a thin boundary adapter; the shared algorithm lives in send_openai_compatible(); the capability matrix is the authoritative source of per-(vendor, model) feature support; the GUI adapts to the matrix, not to vendor names.", + "threading_constraint": "Same as existing pattern: _send_lock serializes all send() calls; per-vendor history locks (e.g. _minimax_history_lock) guard history mutations; the shared helper is stateless and thread-safe (the OpenAI SDK is thread-safe for distinct clients; the caller owns the client).", + "verification_criteria": [ + "src/vendor_capabilities.py:get_capabilities(vendor, model) returns correct VendorCapabilities for all 4 OpenAI-compatible vendors + Qwen models", + "src/vendor_capabilities.py:get_capabilities fallback to vendor default when model not registered", + "src/openai_compatible.py:send_openai_compatible handles streaming, non-streaming, tool calls, vision, errors", + "src/openai_compatible.py:send_openai_compatible classifies OpenAI errors to ProviderError kinds", + "_send_qwen() uses DashScope SDK; tool format translated from OpenAI shape", + "_send_qwen() handles Qwen-VL vision (image base64), Qwen-Audio stub", + "_send_llama() supports Ollama, OpenRouter, custom URL backends", + "_send_llama() unions Ollama /api/tags and OpenRouter /v1/models for model discovery", + "_send_grok() uses xAI endpoint (base_url hardcoded to https://api.x.ai/v1)", + "_send_grok() handles Grok-2-Vision vision", + "_send_minimax() refactored: ~50 lines instead of ~250, all existing test_minimax_provider.py tests pass", + "GUI: screenshot button enabled iff capabilities.vision is true for the active (vendor, model)", + "GUI: cost panel shows correct value (estimate, 'Free (local)', or '—') based on capabilities.cost_tracking and base URL", + "GUI: 9 UX adaptations from spec.md §6 all work end-to-end", + "No regressions in 273+ existing tests (full test suite passes)", + "No new threading.Thread calls in src/ (per project invariant)", + "No top-level heavy imports in src/ai_client.py beyond what's already there (dashscope import is acceptable; flag if it pushes import time > 100ms)" + ], + "links": { + "backlog_entry": "conductor/tracks.md (to be added)", + "ai_client_guide": "docs/guide_ai_client.md", + "models_guide": "docs/guide_models.md", + "workflow_pitfalls": "conductor/workflow.md#known-pitfalls-2026-06-05", + "related_tracks": [ + "conductor/tracks/openai_integration_20260308/", + "conductor/tracks/zhipu_integration_20260308/", + "conductor/tracks/startup_speedup_20260606/", + "conductor/tracks/test_batching_refactor_20260606/" + ], + "external_docs": [ + "https://help.aliyun.com/zh/model-studio/ (DashScope)", + "https://openrouter.ai/docs (OpenRouter)", + "https://github.com/ollama/ollama/blob/main/docs/openai.md (Ollama OpenAI compat)", + "https://docs.x.ai/ (xAI)" + ] + } +} diff --git a/conductor/tracks/qwen_llama_grok_integration_20260606/spec.md b/conductor/tracks/qwen_llama_grok_integration_20260606/spec.md new file mode 100644 index 00000000..6cf93f29 --- /dev/null +++ b/conductor/tracks/qwen_llama_grok_integration_20260606/spec.md @@ -0,0 +1,483 @@ +# Track: Qwen, Llama & Grok Vendor Integration + Capability Matrix + +**Status:** Active (spec approved 2026-06-06) +**Initialized:** 2026-06-06 +**Owner:** Tier 2 Tech Lead +**Priority:** High (extends vendor matrix; foundational for future open-source / self-hosted support) + +--- + +## 1. Overview + +This track adds first-class support for three new AI vendors — **Qwen** (via Alibaba DashScope native API), **Llama** (via Ollama local, OpenRouter cloud, and custom base URL), and **Grok** (via xAI's OpenAI-compatible endpoint) — alongside a new **Vendor Capability Matrix** that declares per-(vendor, model) feature support and lets the GUI adapt dynamically instead of hard-coding per-vendor UI branches. + +The track also refactors the existing **MiniMax** provider to use a new shared OpenAI-compatible send helper, eliminating the duplicate OpenAI-compatible request/response logic that the new vendors would otherwise introduce. This is a data-oriented refactor (Fleury / Acton / Lottes framing): the shared helper is the algorithm that operates on a normalized message data structure; each vendor's entry point is a thin adapter that translates vendor-specific request/response shapes into the normalized form at the boundary. + +The follow-up track "Anthropic / Gemini / DeepSeek Capability Matrix Migration" (see §13.1) will migrate the remaining three providers onto the same matrix in a separate effort. This track stays focused on the greenfield additions + the safe MiniMax refactor. + +## 2. Goals (Priority Order) + +| Priority | Goal | Rationale | +|---|---|---| +| **A (foundational)** | Vendor Capability Matrix framework. Per-(vendor, model) feature declarations. UX reads the matrix to enable/disable UI elements. | The user's stated architectural goal: "aggregate all those granular features into a feature support listing... the ux can adjust what's available." Per Casey Muratori's module-layer-boundary pattern: `ai_client` is the authoritative owner of "what can vendor X do"; `gui_2` adapts to that surface. | +| **A (primary value)** | Qwen via DashScope native SDK. Wire Qwen-Plus, Qwen-Max, Qwen-Long (1M+ context), Qwen-VL-Plus, Qwen-VL-Max (vision), Qwen-Audio. | Qwen has a meaningful unique API surface (vs OpenAI-compatible). DashScope native SDK unlocks features that the OpenAI-compatible mode loses (Qwen-Audio, Qwen-Long custom chunking, Qwen-VL-Max enhanced vision). | +| **A (primary value)** | Llama via Ollama (local) + OpenRouter (cloud) + custom base URL. | Llama has no first-party API. The "vendor" is the model family; the backend is per-project config. Ollama covers local; OpenRouter is the universal cloud aggregator (Together, Groq, Fireworks, etc. all flow through it); custom URL is the escape hatch for self-hosted / unusual backends. | +| **A (primary value)** | Grok via xAI (OpenAI-compatible). Wire Grok-2, Grok-2-Vision. | xAI's API is OpenAI-compatible; the value is filling in the matrix entry and exposing Grok-2-Vision for the screenshot feature. | +| **B (architectural)** | Shared OpenAI-compatible helper in `src/openai_compatible.py`. MiniMax, Llama, Grok all call into it. | Data-oriented design: share the algorithm (HTTP call, response parsing, tool-call detection, streaming, history repair, error classification) on a normalized data structure. Each vendor entry point is a thin adapter. | +| **B (architectural)** | MiniMax refactored to use the shared helper. | MiniMax is already OpenAI-compatible; pure win, ~250 lines of duplicated logic deleted. Mitigated by existing `tests/test_minimax_provider.py`. | +| **C (optimization)** | Capability matrix v1 populates for the 4 OpenAI-compatible vendors + Qwen. Anthropic/Gemini/DeepSeek get "pending migration" entries; the UX does not read them yet. | Half-baked matrix is worse than no matrix. Populating for the vendors that share the new helper keeps the matrix meaningful without risking regressions in the unique-API vendors. | +| **C (optimization)** | UX adapts to the matrix: vision button hidden when `vision: false`; cache panel hidden when `caching: false`; cost panel shows "—" when `cost_tracking: false` (e.g., local backends). | The whole point of the matrix. Specific UI adaptations listed in §8. | + +### 2.1 Non-Goals (this track) + +- **Not** migrating Anthropic, Gemini, or DeepSeek to the capability matrix. They have genuinely unique APIs (4-breakpoint caching, genai SDK, raw HTTP) and their migration belongs in a separate, careful track. Stub entries: "pending_migration". +- **Not** adding audio input support (Qwen-Audio's audio files). Audio is a deferred capability (§6). +- **Not** adding server-side code execution. Deferred to §6. +- **Not** changing the AI Settings panel layout beyond the minimum needed to expose the new providers and the capability-driven UI adaptations. +- **Not** adding model fine-tuning management for any of the three new vendors. +- **Not** adding batch API support for any of the three new vendors. + +## 3. Architecture + +### 3.1 Data-Oriented Design (Fleury / Acton / Lottes) + +The user's design philosophy (referencing Ryan Fleury's code/data separation, Mike Acton's data-oriented design, Timothy Lottes' cache-aware algorithms) translates concretely to: + +- **The data is the API.** The "OpenAI-compatible send" operates on a normalized data structure: `messages: list[dict]`, `tools: list[dict]`, `model_capabilities: VendorCapabilities`, `response: NormalizedResponse`. The structure is laid out linearly (SoA where applicable) and processed in bulk. +- **The algorithm is shared.** One function: `send_openai_compatible(client, model, messages, tools, capabilities, *, stream_callback=None) -> NormalizedResponse`. It handles HTTP, response parsing, tool-call detection, streaming chunk aggregation, error classification, history repair, and token usage extraction — all on the normalized data. +- **The adapters are per-vendor.** Each vendor's `_send_()` is a thin function that: + 1. Initializes the vendor-specific client (OpenAI SDK with vendor's base URL + auth, or DashScope SDK). + 2. Loads the vendor's history (`_minimax_history`, `_llama_history`, etc.) and capabilities from the registry. + 3. Calls `send_openai_compatible(...)` (or, for Qwen, the DashScope-specific helper). + 4. Updates the vendor's history with the normalized response. + 5. Returns the text content to `ai_client.send()`. + +This means: +- **Adding a new OpenAI-compatible vendor** = 50 lines of glue (client init + capability declaration + history storage), not 300 lines of duplicated logic. +- **Anthropic/Gemini/DeepKeep** stay per-vendor code paths; the data-oriented refactor doesn't apply to them because their unique APIs are not OpenAI-compatible-shaped. +- **"Base paths are unique"** (the user's wording) means: `_send_qwen()`, `_send_llama()`, `_send_grok()`, `_send_minimax()` are the unique entry points; everything they call into is shared. + +### 3.2 Module Layout + +``` +src/ + ai_client.py # Modified: refactor _send_minimax; add _send_qwen/_send_llama/_send_grok + vendor_capabilities.py # NEW: VendorCapabilities dataclass, registry, get_capabilities() + openai_compatible.py # NEW: shared OpenAI-compatible send helper + cost_tracker.py # Modified: add Qwen/Llama/Grok pricing + models.py # Modified: add provider metadata for Qwen/Llama/Grok + gui_2.py # Modified: register Qwen/Llama/Grok in PROVIDERS; capability-driven UI + app_controller.py # Modified: same + credentials_template.toml # Modified: add [qwen], [llama], [grok] sections +``` + +``` +tests/ + test_vendor_capabilities.py # NEW: capability matrix tests + test_openai_compatible.py # NEW: shared helper tests + test_qwen_provider.py # NEW: Qwen-specific tests (DashScope adapter, history repair, error classification) + test_llama_provider.py # NEW: Llama-specific tests (multi-backend, model discovery) + test_grok_provider.py # NEW: Grok-specific tests (xAI endpoint, Grok-2-Vision) + test_minimax_provider.py # Modified: verify refactor preserves behavior +``` + +### 3.3 Capability Matrix v1 — 7 Capabilities + +| Capability | Type | Purpose | UX Effect | +|---|---|---|---| +| `vision` | `bool` | Can accept image inputs (screenshots). | Screenshot button enabled/disabled in message panel. | +| `tool_calling` | `bool` | Supports function/tool calls. | Tool system toggle; "Tools enabled" indicator. | +| `caching` | `bool` | Supports server-side prompt caching (Gemini explicit, Anthropic ephemeral). | Cache panel visible/hidden. Cache indicators in token budget. | +| `streaming` | `bool` | Supports streaming responses. | Stream progress bar visible/hidden. | +| `model_discovery` | `bool` | Backend exposes `/v1/models` (or equivalent) for live model list. | "Fetch Models" button enabled/disabled. | +| `context_window` | `int` | Maximum input tokens for this model. | Token budget panel max. | +| `cost_tracking` | `bool` | Per-token pricing known. | Cost panel shows estimate; hides with "—" for unknown. | + +**Deferred to v2 (separate track):** +- `audio_input` (Qwen-Audio only) +- `pdf_input` (Gemini, Anthropic) +- `server_side_code_execution` (Anthropic, OpenAI, Gemini) +- `image_generation`, `fine_tuning`, `batch_api` (none currently) + +### 3.4 Per-(vendor, model) Capabilities + +Capabilities are declared per-model, not per-vendor, because a vendor can have both vision and text-only models (Qwen: Qwen-VL-Plus vs Qwen-Plus; Llama: 3.2-Vision vs 3.2-1B/3B; Grok: Grok-2-Vision vs Grok-2). + +```python +@dataclass(frozen=True) +class VendorCapabilities: + vendor: str # "qwen" | "llama" | "grok" | "minimax" | "anthropic" | "gemini" | ... + model: str # the model name, e.g. "qwen-vl-max" or "*" for vendor default + vision: bool = False + tool_calling: bool = True + caching: bool = False + streaming: bool = True + model_discovery: bool = True + context_window: int = 8192 # tokens + cost_tracking: bool = True # False for local backends where cost is unknown/free + cost_input_per_mtok: float = 0.0 # USD per million input tokens + cost_output_per_mtok: float = 0.0 # USD per million output tokens + notes: str = "" +``` + +**Lookup pattern:** `get_capabilities(vendor, model) -> VendorCapabilities`. The registry is a flat dict keyed by `(vendor, model)`. Lookups fall back to the vendor's default entry if a specific model isn't registered. + +**Registry source of truth:** `src/vendor_capabilities.py` has a hardcoded `_REGISTRY: dict[tuple[str, str], VendorCapabilities]` populated at import time. The data is in code (not TOML) because: +- It's referenced by `_send_()` per call (hot path; can't afford file I/O). +- Changes are tied to vendor SDK updates and are code-reviewed. +- TOML is for user-config (credentials, project settings); vendor capabilities are platform facts. + +## 4. Per-Vendor Designs + +### 4.1 Qwen via DashScope Native SDK + +**Why native (not OpenAI-compatible mode):** DashScope's native API unlocks Qwen-Audio, Qwen-Long (1M+ context with custom chunking), Qwen-VL-Max (enhanced vision), and DashScope-specific tool format with `parameters` schema. OpenAI-compatible mode loses these. + +**SDK:** `dashscope` (added to `pyproject.toml` dependencies). + +**State (module-level globals, following the existing pattern):** +```python +_qwen_client: dashscope.Generation | None = None +_qwen_history: list[dict[str, Any]] = [] +_qwen_history_lock: threading.Lock = threading.Lock() +``` + +**Credentials:** `credentials.toml` `[qwen]` section with `api_key` and optional `region` (default: `china`; alternatives: `international`). + +**Configuration per-project (TOML):** `provider = "qwen"`, `qwen_model = "qwen-max"`. Optional `qwen_region = "international"`. + +**Models shipped in the capability registry (v1):** + +| Model | vision | tool_calling | caching | context_window | cost_input | cost_output | +|---|---|---|---|---|---|---| +| `qwen-turbo` | false | true | false | 1,000,000 | $0.05 | $0.10 | +| `qwen-plus` | false | true | false | 131,072 | $0.40 | $1.20 | +| `qwen-max` | false | true | false | 32,768 | $2.00 | $6.00 | +| `qwen-long` | false | true | false | 1,000,000 | $0.07 | $0.28 | +| `qwen-vl-plus` | true | true | false | 131,072 | $0.21 | $0.63 | +| `qwen-vl-max` | true | true | false | 32,768 | $0.50 | $1.50 | +| `qwen-audio` | true (audio) | true | false | 32,768 | $0.10 | $0.30 | + +(Pricing from Alibaba Cloud DashScope public pricing as of 2026-06-06; update if needed.) + +**Entry point:** `_send_qwen()` in `src/ai_client.py`. Calls a DashScope-specific helper (not the OpenAI-compatible one) because DashScope's request/response shape differs. + +**Tool format translation:** DashScope uses a slightly different tool schema than OpenAI. The Qwen adapter translates from the normalized tool definitions (OpenAI-shaped) to DashScope's `tools: list[dict]` with `parameters: dict` schema. + +**Vision / audio:** Qwen-VL accepts image URLs or base64; Qwen-Audio accepts audio URLs or base64. The adapter handles the multipart encoding. + +**Error classification:** `_classify_qwen_error()` maps DashScope exceptions to `ProviderError` kinds (`quota`, `rate_limit`, `auth`, `balance`, `network`). + +**Model discovery:** DashScope exposes a `list_models` API. `_list_qwen_models()` returns the hardcoded registry (DashScope doesn't have a great runtime discovery API; the hardcoded list is the source of truth). + +**Vision support:** Qwen-Audio and Qwen-VL-* register `vision: true`. The UX's screenshot button is enabled for those models. For Qwen-Audio, the screenshot button is replaced with an audio attachment button (deferred to v2; for v1, audio attachment is wired but the button is hidden — see §6). + +### 4.2 Llama (Ollama + OpenRouter + Custom URL) + +**Why three backends:** Llama has no first-party API. The "vendor" is the model family; the backend is per-project config. +- **Ollama** (local, ubiquitous): OpenAI-compatible at `http://localhost:11434/v1`. Free. +- **OpenRouter** (cloud aggregator): OpenAI-compatible at `https://openrouter.ai/api/v1`. Single API key covers Together, Groq, Fireworks, etc. +- **Custom URL** (escape hatch): any OpenAI-compatible endpoint. For self-hosted vLLM, llama.cpp, LM Studio, or any unusual cloud. + +**SDK:** `openai` (already a dependency, used for MiniMax). + +**State (module-level globals):** +```python +_llama_client: OpenAI | None = None +_llama_history: list[dict[str, Any]] = [] +_llama_history_lock: threading.Lock = threading.Lock() +_llama_base_url: str = "http://localhost:11434/v1" # default +_llama_api_key: str = "ollama" # Ollama doesn't require auth +``` + +**Credentials:** `credentials.toml` `[llama]` section with `api_key` (empty for Ollama) and `base_url`. + +**Configuration per-project (TOML):** `provider = "llama"`, `llama_model = "llama-3.3-70b"`, `llama_base_url = "https://openrouter.ai/api/v1"`, `llama_api_key_env = "OPENROUTER_API_KEY"` (optional env override). + +**Models shipped in the capability registry (v1):** + +| Model | vision | tool_calling | caching | context_window | cost_input | cost_output | +|---|---|---|---|---|---|---| +| `llama-3.1-8b-instant` | false | true | false | 131,072 | $0.05 (Groq) | $0.08 | +| `llama-3.1-70b-versatile` | false | true | false | 131,072 | $0.59 (Groq) | $0.79 | +| `llama-3.1-405b-reasoning` | false | true | false | 131,072 | $3.00 (OpenRouter avg) | $3.00 | +| `llama-3.2-1b-preview` | false | true | false | 131,072 | $0.04 | $0.04 | +| `llama-3.2-3b-preview` | false | true | false | 131,072 | $0.06 | $0.06 | +| `llama-3.2-11b-vision-preview` | true | true | false | 131,072 | $0.18 | $0.18 | +| `llama-3.2-90b-vision-preview` | true | true | false | 131,072 | $0.90 | $0.90 | +| `llama-3.3-70b-specdec` | false | true | false | 131,072 | $0.59 (Groq) | $0.79 | +| `llama-*` (wildcard) | model-specific | true | false | 131,072 | $0 | $0 | + +(Pricing varies by backend; registry entries represent the most common case. Cost overrides per-project allowed via TOML.) + +**Local backend default:** When `llama_base_url` is `http://localhost:11434/v1` and `llama_api_key` is empty, `cost_tracking: false` (free). UX cost panel shows "Free (local)" instead of an estimate. + +**Entry point:** `_send_llama()` in `src/ai_client.py`. Calls the shared `send_openai_compatible()` helper. + +**Tool format:** Native OpenAI (Llama backends all use OpenAI's tool format). No translation needed. + +**Error classification:** `_classify_llama_error()` — same as MiniMax's error classifier (OpenAI SDK errors are uniform across backends). + +**Model discovery:** Ollama exposes `GET /api/tags` (not `/v1/models`); OpenRouter exposes `GET /v1/models`. The Llama adapter probes both endpoints and unions the results. For custom URLs, falls back to the hardcoded registry. + +### 4.3 Grok via xAI (OpenAI-Compatible) + +**SDK:** `openai` (already a dependency). + +**State:** +```python +_grok_client: OpenAI | None = None +_grok_history: list[dict[str, Any]] = [] +_grok_history_lock: threading.Lock = threading.Lock() +``` + +**Credentials:** `credentials.toml` `[grok]` section with `api_key`. (xAI's `base_url` is hardcoded to `https://api.x.ai/v1`.) + +**Configuration per-project (TOML):** `provider = "grok"`, `grok_model = "grok-2"`. + +**Models shipped in the capability registry (v1):** + +| Model | vision | tool_calling | caching | context_window | cost_input | cost_output | +|---|---|---|---|---|---|---| +| `grok-2` | false | true | false | 131,072 | $2.00 | $10.00 | +| `grok-2-vision` | true | true | false | 32,768 | $2.00 | $10.00 | +| `grok-beta` | false | true | false | 131,072 | $5.00 | $15.00 | + +(Pricing from x.ai public pricing as of 2026-06-06; update if needed.) + +**Entry point:** `_send_grok()` in `src/ai_client.py`. Calls `send_openai_compatible()` with the xAI base URL. + +**Tool format:** Native OpenAI. No translation needed. + +**Vision:** Grok-2-Vision accepts image URLs or base64. The OpenAI-compatible helper already handles vision via the OpenAI SDK's multimodal message format. + +**Error classification:** Same as OpenAI-compatible vendors (uniform error shape via the openai SDK). + +**Model discovery:** xAI exposes `GET /v1/models`. Standard OpenAI-compatible discovery. + +## 5. Shared OpenAI-Compatible Helper + +### 5.1 Module: `src/openai_compatible.py` + +```python +from dataclasses import dataclass +from typing import Any, Callable, Optional +from openai import OpenAI, OpenAIError + +@dataclass(frozen=True) +class NormalizedResponse: + text: str + tool_calls: list[dict[str, Any]] + usage_input_tokens: int + usage_output_tokens: int + usage_cache_read_tokens: int + usage_cache_creation_tokens: int + raw_response: Any + +@dataclass +class OpenAICompatibleRequest: + messages: list[dict[str, Any]] + tools: Optional[list[dict[str, Any]]] = None + model: str = "" + temperature: float = 0.0 + top_p: float = 1.0 + max_tokens: int = 8192 + stream: bool = False + stream_callback: Optional[Callable[[str], None]] = None + +def send_openai_compatible( + client: OpenAI, + request: OpenAICompatibleRequest, + *, + capabilities: VendorCapabilities, +) -> NormalizedResponse: ... +``` + +The helper: +1. Translates `request.messages` into the OpenAI SDK's `messages` parameter (passthrough — already in OpenAI shape). +2. Translates `request.tools` if non-None (passthrough for now; future: strip unsupported fields based on `capabilities`). +3. Calls `client.chat.completions.create(...)` with the right `model`, `temperature`, `top_p`, `max_tokens`, `stream`, `tools`, `tool_choice="auto"`. +4. If streaming: aggregates chunks; calls `stream_callback(text_chunk)` for each text delta; collects final usage from the last chunk. +5. If non-streaming: parses the response in one shot. +6. Returns a `NormalizedResponse` with text, tool calls (in OpenAI shape), usage stats. +7. On exception: classifies the OpenAI exception and re-raises as `ProviderError` (using `_classify_openai_compatible_error()`). + +The helper is the **algorithm on the data**. Per-vendor adapters (Llama, Grok, MiniMax) are the **boundary code that converts vendor-specific state to/from the normalized form**. + +### 5.2 Refactor of `_send_minimax()` + +**Before:** ~250 lines of inline OpenAI-compatible send logic (lines 2103-2264 of `src/ai_client.py` per the existing grep). Mixes client init, message building, API call, response parsing, tool call handling, history repair, error classification. + +**After:** ~50 lines. `_send_minimax()` becomes: +```python +def _send_minimax(md_content, user_message, base_dir, file_items, discussion_history, ...): + _ensure_minimax_client() + with _minimax_history_lock: + _repair_minimax_history(_minimax_history) + if discussion_history and not _minimax_history: + _minimax_history.extend(_parse_discussion_history(discussion_history)) + _minimax_history.append({"role": "user", "content": _build_user_content(...)}) + + request = OpenAICompatibleRequest( + messages=_minimax_history, + tools=_build_tools(...), + model=_model, + temperature=_temperature, + top_p=_top_p, + max_tokens=_max_tokens, + stream=True, + stream_callback=stream_callback, + ) + caps = get_capabilities("minimax", _model) + response = send_openai_compatible(_minimax_client, request, capabilities=caps) + + # Append response to history (same logic as today) + ... + return response.text +``` + +The behavior is identical; the code is shorter. `tests/test_minimax_provider.py` is the safety net (existing test coverage should pass without modification). + +## 6. UX Adaptation (Capability-Driven UI) + +The GUI reads `get_capabilities(active_vendor, active_model)` once per render frame and stores it in a local. Specific adaptations: + +| UI Element | Behavior based on matrix | +|---|---| +| **Screenshot button** (Message panel) | Enabled iff `vision: true`. Tooltip explains why if disabled. | +| **Audio attachment button** (Message panel) | **Deferred to v2.** Stub: always hidden in v1. | +| **Tools enabled toggle** (Message panel) | Enabled iff `tool_calling: true`. | +| **Cache panel** (Operations Hub) | Visible iff `caching: true`. | +| **Cache indicators** (Token budget) | Shown iff `caching: true`. | +| **Stream progress** (Response panel) | Visible iff `streaming: true`. | +| **Fetch Models button** (AI Settings) | Enabled iff `model_discovery: true`. | +| **Token budget max** (Token budget) | Set to `capabilities.context_window`. | +| **Cost estimate** (MMA Dashboard) | Shown iff `cost_tracking: true`; shows "Free (local)" for `cost_tracking: false` + `base_url` containing `localhost`/`127.0.0.1`; shows "—" for other `cost_tracking: false` cases. | + +The adaptations are gated on the capability value, not on vendor name. The `gui_2.py` change is one new helper: `def _get_active_capabilities(self) -> VendorCapabilities: return get_capabilities(self._provider, self._model)`. The render functions query this once at the top of their scope. + +## 7. Configuration + +### 7.1 `pyproject.toml` — new dependency + +```toml +[project] +dependencies = [ + ... + "dashscope>=1.14.0", # NEW + "openai>=1.0.0", # already a dependency +] +``` + +### 7.2 `credentials.toml` — new sections + +```toml +[qwen] +api_key = "YOUR_DASHSCOPE_KEY" +# region = "china" # default; "international" also valid + +[llama] +# api_key = "YOUR_OPENROUTER_KEY" # required for OpenRouter; empty for Ollama +# base_url = "https://openrouter.ai/api/v1" # default for cloud; "http://localhost:11434/v1" for Ollama + +[grok] +api_key = "YOUR_XAI_KEY" +``` + +### 7.3 Per-project TOML — provider selection + +```toml +[ai] +provider = "qwen" # "qwen" | "llama" | "grok" | (existing: "gemini", "anthropic", ...) +model = "qwen-vl-max" +qwen_region = "china" # vendor-specific +# OR +llama_base_url = "https://openrouter.ai/api/v1" +llama_api_key_env = "OPENROUTER_API_KEY" # optional: read key from env +# OR +grok_model = "grok-2-vision" +``` + +## 8. Testing Strategy + +| Test File | Purpose | Coverage Target | +|---|---|---| +| `tests/test_vendor_capabilities.py` | Registry lookup, fallback to vendor default, per-model overrides. | 100% | +| `tests/test_openai_compatible.py` | Request building, response parsing, streaming aggregation, tool call detection, error classification. | 90% | +| `tests/test_qwen_provider.py` | DashScope adapter, tool format translation, Qwen-VL vision, Qwen-Audio stub. | 80% | +| `tests/test_llama_provider.py` | Multi-backend (Ollama mock + OpenRouter mock), model discovery union, custom URL fallback. | 80% | +| `tests/test_grok_provider.py` | xAI endpoint, Grok-2-Vision vision, model discovery. | 80% | +| `tests/test_minimax_provider.py` (modified) | Verify refactor preserves behavior. Existing tests should pass unmodified. | 100% (regression) | + +**Mocking strategy:** All tests use `unittest.mock.patch` on the vendor SDKs (DashScope, OpenAI). No real API calls. The `RUN_REAL_AI_TESTS=1` env var continues to gate opt-in real-API tests (out of scope for this track). + +**Integration verification:** Manual smoke test in the GUI: select Qwen provider, send a message with a tool call, confirm the tool executes. Repeat for Llama and Grok. Document the smoke test results in the Phase 4 checkpoint git note. + +## 9. Migration / Rollout + +| Phase | What | Risk | +|---|---|---| +| **Phase 1 — Capability matrix framework + shared helper** | Add `src/vendor_capabilities.py` and `src/openai_compatible.py`. Add unit tests for both. Add `dashscope` to `pyproject.toml`. No user-facing changes. | Low. New files, no modifications to `ai_client.py`. | +| **Phase 2 — Qwen via DashScope** | Implement `_send_qwen()` in `src/ai_client.py`. Add `[qwen]` to credentials template. Register `qwen` in `PROVIDERS` lists. Populate capability registry for Qwen models. | Medium. New SDK, new code path, new credentials section. | +| **Phase 3 — Grok + Llama via shared helper** | Implement `_send_grok()` and `_send_llama()`. Both call `send_openai_compatible()`. Add `[grok]` and `[llama]` credentials sections. Register in PROVIDERS lists. | Medium. New code paths, but lighter than Qwen (OpenAI-compatible). | +| **Phase 4 — MiniMax refactor** | Refactor `_send_minimax()` to use the shared helper. Verify all existing `tests/test_minimax_provider.py` tests pass. | Medium-High. Touching working code. Mitigated by existing test coverage. | +| **Phase 5 — UX adaptation + integration** | Add `_get_active_capabilities()` to `gui_2.py`. Apply the 9 UI adaptations from §6. Run the full test suite. | Low. UI-only changes. | +| **Phase 6 — Docs + archive** | Update `docs/guide_ai_client.md` to document the new vendors, the capability matrix, and the shared helper. Update `docs/guide_models.md` for the new PROVIDERS entries. Archive the track. | Low. | + +Each phase has its own checkpoint commit and git note. + +## 10. Risks & Mitigations + +| Risk | Likelihood | Impact | Mitigation | +|---|---|---|---| +| MiniMax refactor breaks existing behavior. | Medium | High (regresses a working provider) | `tests/test_minimax_provider.py` is the safety net. Run it after every change. If it fails, the refactor is incorrect — fix forward, don't revert. | +| DashScope SDK has API differences from documentation (e.g., response shape). | Medium | Medium | Pin to a specific DashScope version (`>=1.14.0,<2.0.0`). Test against the actual SDK in CI. | +| OpenRouter pricing varies by underlying model; registry entries may be inaccurate. | High | Low (cost estimates are advisory) | Cost panel shows "Estimate" with a tooltip. Add a "Pricing source: x" line. | +| Ollama's `/api/tags` shape differs from `/v1/models`; the union function may miss models. | Low | Low (model list is a convenience) | Fall back to the hardcoded registry. Manual override per-project via TOML. | +| Capability matrix drift: a model ships a new feature (e.g., Qwen-Plus gains vision) but the registry says `vision: false`. | Medium | Low (user sees a missing feature) | Document the update process: edit `src/vendor_capabilities.py`, add a test, commit. Make the registry the canonical place to look. | +| Local backends (Ollama) need CORS / firewall configured for the GUI to talk to them. | Low | Medium (user can't connect) | Document the Ollama setup in the credentials template comments. Reference the Ollama docs for `OLLAMA_ORIGINS`. | +| Llama backends may rate-limit aggressively (especially free tiers of OpenRouter). | Medium | Low | The existing `_classify_openai_compatible_error()` already maps 429 to `rate_limit`. The error UI surfaces this clearly. | + +## 11. Out of Scope (Explicit) + +- **Audio input support** (Qwen-Audio, future Grok-Audio). Deferred to a follow-up track that adds an audio attachment button to the message panel and a `audio_input` capability to the matrix. +- **Server-side code execution** (Anthropic, OpenAI, Gemini). Deferred; the matrix has a placeholder entry `server_side_code_execution: false` for all v1 vendors. +- **Anthropic / Gemini / DeepSeek capability matrix migration**. Tracked as a separate track ("Open-Vendor Matrix Migration Phase 2" — see §13.1). Their unique APIs need careful, vendor-by-vendor migration. +- **Batch API support** for any of the three new vendors. Not requested. +- **Fine-tuning management** for any of the three new vendors. Not requested. +- **Image generation** (DALL-E, Midjourney, etc.). Not in scope; the matrix has a placeholder `image_generation: false`. +- **PDF input** (Gemini, Anthropic). Deferred. + +## 12. Open Questions + +1. **Per-model cost overrides:** Should `manual_slop.toml` allow per-project cost overrides for Llama backends (since pricing varies by which underlying provider OpenRouter routes to)? (Proposal: yes; add `llama_cost_input` / `llama_cost_output` to the per-project TOML.) +2. **Default Llama base URL:** Should the default be Ollama (`localhost:11434`) or OpenRouter? (Proposal: Ollama for the "first-time user gets a working setup" experience; OpenRouter requires an API key.) +3. **DashScope region selection:** How does the user pick `china` vs `international`? Per-project TOML (`qwen_region = "international"`) or env var (`DASHSCOPE_REGION`)? (Proposal: both; TOML wins.) +4. **Qwen-Coder and Qwen-Math specialized models:** Include in v1 or defer? (Proposal: defer to v1.1; the matrix entry is trivial but the model-specific prompting optimization is out of scope.) + +## 13. See Also + +### 13.1 Follow-up Track (separate plan) + +**"Anthropic / Gemini / DeepSeek Capability Matrix Migration"** — Migrates the three remaining providers onto the same capability matrix. Required pre-work: ensure the matrix's per-model lookup pattern handles the `caching: true` (Anthropic 4-breakpoint, Gemini explicit) and `pdf_input: true` (Anthropic, Gemini) capabilities. Each provider keeps its unique per-vendor code path (the 4-breakpoint system, the genai SDK); the matrix entries are populated so the UX can adapt. This is a separate track because the migration of each unique-API provider is non-trivial and the risk of regressing the existing working code is high. + +### 13.2 Project References + +- `docs/guide_ai_client.md` — current `ai_client.py` architecture; will be updated in Phase 6 to document the matrix and the shared helper. +- `docs/guide_models.md` — current PROVIDERS constant and provider metadata; will be updated in Phase 6. +- `conductor/tracks/openai_integration_20260308/` — closest prior art (single provider, OpenAI-compatible). +- `conductor/tracks/zhipu_integration_20260308/` — second prior art (single provider, custom API). +- `conductor/tracks/startup_speedup_20260606/` — example of an active track in this project (same convention). +- `conductor/tracks/test_batching_refactor_20260606/` — second example of an active track in this project. +- `conductor/product.md` "Multi-Provider Integration" — product-level overview of the multi-provider architecture. +- `conductor/product-guidelines.md` "Modular Controller Pattern" — the convention this track follows for `vendor_capabilities.py` and `openai_compatible.py` as standalone modules. + +### 13.3 External References + +- **Ryan Fleury on code/data separation** — informs the data-oriented design (vendor capabilities as data, helper as algorithm, per-vendor code as boundary adapter). +- **Mike Acton on data-oriented design** — informs the SoA-like layout of the capability matrix and the "transform data, don't mutate state" framing. +- **Timothy Lottes on cache-aware algorithms** — informs the helper's streaming aggregation (bulk-process chunks, minimize per-chunk overhead). +- **Alibaba DashScope documentation** — `https://help.aliyun.com/zh/model-studio/` for the native API reference. +- **OpenRouter API documentation** — `https://openrouter.ai/docs` for the cloud aggregator. +- **Ollama OpenAI compatibility** — `https://github.com/ollama/ollama/blob/main/docs/openai.md` for the local backend. +- **xAI API documentation** — `https://docs.x.ai/` for the Grok endpoint. diff --git a/conductor/tracks/qwen_llama_grok_integration_20260606/state.toml b/conductor/tracks/qwen_llama_grok_integration_20260606/state.toml new file mode 100644 index 00000000..494046b5 --- /dev/null +++ b/conductor/tracks/qwen_llama_grok_integration_20260606/state.toml @@ -0,0 +1,134 @@ +# Track state for qwen_llama_grok_integration_20260606 +# Updated by Tier 2 Tech Lead as tasks complete + +[meta] +track_id = "qwen_llama_grok_integration_20260606" +name = "Qwen, Llama & Grok Vendor Integration + Capability Matrix" +status = "active" +current_phase = 0 +last_updated = "2026-06-06" + +[phases] +# Phase 1: Capability matrix framework + shared helper (no user-facing changes) +phase_1 = { status = "pending", checkpoint_sha = "", name = "Capability matrix framework + shared helper" } +# Phase 2: Qwen via DashScope +phase_2 = { status = "pending", checkpoint_sha = "", name = "Qwen via DashScope" } +# Phase 3: Grok + Llama via shared helper +phase_3 = { status = "pending", checkpoint_sha = "", name = "Grok + Llama via shared helper" } +# Phase 4: MiniMax refactor +phase_4 = { status = "pending", checkpoint_sha = "", name = "MiniMax refactor to use shared helper" } +# Phase 5: UX adaptation + integration +phase_5 = { status = "pending", checkpoint_sha = "", name = "UX adaptation + integration" } +# Phase 6: Docs + archive +phase_6 = { status = "pending", checkpoint_sha = "", name = "Docs + archive" } + +[tasks] +# Phase 1: Capability matrix framework + shared helper +# (Tasks TBD by writing-plans; placeholder structure only) +t1_1 = { status = "pending", commit_sha = "", description = "Red: tests/test_vendor_capabilities.py::test_registry_lookup_known_model" } +t1_2 = { status = "pending", commit_sha = "", description = "Red: tests/test_vendor_capabilities.py::test_fallback_to_vendor_default" } +t1_3 = { status = "pending", commit_sha = "", description = "Red: tests/test_vendor_capabilities.py::test_unknown_vendor_raises" } +t1_4 = { status = "pending", commit_sha = "", description = "Green: implement src/vendor_capabilities.py with VendorCapabilities + get_capabilities + initial registry" } +t1_5 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_send_non_streaming" } +t1_6 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_send_streaming_aggregates_chunks" } +t1_7 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_tool_call_detection" } +t1_8 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_vision_multimodal_message" } +t1_9 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_error_classification_429_to_rate_limit" } +t1_10 = { status = "pending", commit_sha = "", description = "Green: implement src/openai_compatible.py with NormalizedResponse + OpenAICompatibleRequest + send_openai_compatible" } +t1_11 = { status = "pending", commit_sha = "", description = "Add dashscope>=1.14.0,<2.0.0 to pyproject.toml dependencies" } +t1_12 = { status = "pending", commit_sha = "", description = "Phase 1 checkpoint commit + git note" } +# Phase 2: Qwen via DashScope +t2_1 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_send_qwen_routes_to_dashscope" } +t2_2 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_qwen_tool_format_translation" } +t2_3 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_qwen_vl_vision_image_base64" } +t2_4 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_qwen_error_classification" } +t2_5 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_list_qwen_models" } +t2_6 = { status = "pending", commit_sha = "", description = "Green: implement _send_qwen, _ensure_qwen_client, _classify_qwen_error, _list_qwen_models in src/ai_client.py" } +t2_7 = { status = "pending", commit_sha = "", description = "Add [qwen] section to credentials_template.toml" } +t2_8 = { status = "pending", commit_sha = "", description = "Add qwen to PROVIDERS in src/gui_2.py and src/app_controller.py" } +t2_9 = { status = "pending", commit_sha = "", description = "Add Qwen models to capability registry in src/vendor_capabilities.py" } +t2_10 = { status = "pending", commit_sha = "", description = "Add Qwen pricing to src/cost_tracker.py" } +t2_11 = { status = "pending", commit_sha = "", description = "Phase 2 checkpoint commit + git note" } +# Phase 3: Grok + Llama via shared helper +t3_1 = { status = "pending", commit_sha = "", description = "Red: tests/test_grok_provider.py::test_send_grok_uses_xai_endpoint" } +t3_2 = { status = "pending", commit_sha = "", description = "Red: tests/test_grok_provider.py::test_grok_2_vision_vision_support" } +t3_3 = { status = "pending", commit_sha = "", description = "Green: implement _send_grok, _ensure_grok_client in src/ai_client.py" } +t3_4 = { status = "pending", commit_sha = "", description = "Add [grok] section to credentials_template.toml" } +t3_5 = { status = "pending", commit_sha = "", description = "Add grok to PROVIDERS in src/gui_2.py and src/app_controller.py" } +t3_6 = { status = "pending", commit_sha = "", description = "Add Grok models to capability registry" } +t3_7 = { status = "pending", commit_sha = "", description = "Add Grok pricing to src/cost_tracker.py" } +t3_8 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_send_llama_ollama_backend" } +t3_9 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_send_llama_openrouter_backend" } +t3_10 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_send_llama_custom_url" } +t3_11 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_llama_model_discovery_unions_ollama_and_openrouter" } +t3_12 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_llama_3_2_vision_vision_support" } +t3_13 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_llama_local_backend_cost_tracking_false" } +t3_14 = { status = "pending", commit_sha = "", description = "Green: implement _send_llama, _ensure_llama_client, _list_llama_models in src/ai_client.py" } +t3_15 = { status = "pending", commit_sha = "", description = "Add [llama] section to credentials_template.toml" } +t3_16 = { status = "pending", commit_sha = "", description = "Add llama to PROVIDERS in src/gui_2.py and src/app_controller.py" } +t3_17 = { status = "pending", commit_sha = "", description = "Add Llama models to capability registry" } +t3_18 = { status = "pending", commit_sha = "", description = "Phase 3 checkpoint commit + git note" } +# Phase 4: MiniMax refactor +t4_1 = { status = "pending", commit_sha = "", description = "Baseline: run tests/test_minimax_provider.py; all pass (green)" } +t4_2 = { status = "pending", commit_sha = "", description = "Refactor _send_minimax to use send_openai_compatible helper" } +t4_3 = { status = "pending", commit_sha = "", description = "Verify tests/test_minimax_provider.py still pass (no regressions)" } +t4_4 = { status = "pending", commit_sha = "", description = "Add MiniMax to capability registry (per-model: minimax-* entries with vision/tool/cost)" } +t4_5 = { status = "pending", commit_sha = "", description = "Run full test suite; ensure no regressions" } +t4_6 = { status = "pending", commit_sha = "", description = "Phase 4 checkpoint commit + git note" } +# Phase 5: UX adaptation + integration +t5_1 = { status = "pending", commit_sha = "", description = "Add _get_active_capabilities() helper to src/gui_2.py" } +t5_2 = { status = "pending", commit_sha = "", description = "Apply 9 UX adaptations from spec.md §6 (vision, tools, cache, stream, fetch models, context window, cost)" } +t5_3 = { status = "pending", commit_sha = "", description = "Update _predefined_callbacks / _gettable_fields to expose new provider selection" } +t5_4 = { status = "pending", commit_sha = "", description = "Run full test suite; ensure no regressions in live_gui tests" } +t5_5 = { status = "pending", commit_sha = "", description = "Manual smoke test: select Qwen, send message, tool executes; repeat for Llama, Grok" } +t5_6 = { status = "pending", commit_sha = "", description = "Phase 5 checkpoint commit + git note" } +# Phase 6: Docs + archive +t6_1 = { status = "pending", commit_sha = "", description = "Update docs/guide_ai_client.md: new vendors section, capability matrix section, shared helper section" } +t6_2 = { status = "pending", commit_sha = "", description = "Update docs/guide_models.md: new PROVIDERS entries for qwen/llama/grok" } +t6_3 = { status = "pending", commit_sha = "", description = "git mv conductor/tracks/qwen_llama_grok_integration_20260606 to conductor/tracks/archive/" } +t6_4 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md: move entry from Backlog to Recently Completed" } +t6_5 = { status = "pending", commit_sha = "", description = "Final checkpoint commit + git note" } + +[verification] +# Filled as phases complete +phase_1_capability_registry_complete = false +phase_1_shared_helper_complete = false +phase_2_qwen_dashscope_complete = false +phase_3_grok_complete = false +phase_3_llama_complete = false +phase_4_minimax_refactor_preserves_tests = false +phase_5_ux_adaptations_complete = false +phase_5_smoke_test_passed = false +phase_6_docs_updated = false +phase_6_track_archived = false +full_test_suite_passes = false +no_new_threading_thread_calls = false + +[openai_compatible_models] +# Filled as models are added to capability registry +qwen_turbo = false +qwen_plus = false +qwen_max = false +qwen_long = false +qwen_vl_plus = false +qwen_vl_max = false +qwen_audio = false +llama_3_1_8b = false +llama_3_1_70b = false +llama_3_1_405b = false +llama_3_2_1b = false +llama_3_2_3b = false +llama_3_2_11b_vision = false +llama_3_2_90b_vision = false +llama_3_3_70b = false +grok_2 = false +grok_2_vision = false +grok_beta = false +minimax_models_refactored = false + +[minimax_refactor_stats] +# Filled in Phase 4 +lines_before = 0 +lines_after = 0 +tests_passing = 0 +tests_failing = 0