conductor(track): Initialize qwen_llama_grok_integration_20260606 spec
Three new vendors + capability matrix framework + MiniMax refactor: **Capability matrix v1 (7 features):** vision, tool_calling, caching, streaming, model_discovery, context_window, cost_tracking. Audio and server-side code execution deferred to a follow-up track. **Qwen via DashScope native SDK:** Qwen-Turbo, Qwen-Plus, Qwen-Max, Qwen-Long (1M context), Qwen-VL-Plus/Max (vision), Qwen-Audio. Native API chosen over OpenAI-compatible mode to unlock Qwen-Audio, Qwen-Long custom chunking, and Qwen-VL-Max enhanced vision. **Llama (OpenAI-compatible, multi-backend):** Ollama (local, free), OpenRouter (cloud aggregator covering Together/Groq/Fireworks), custom URL escape hatch. Models: Llama 3.1 8B/70B/405B, 3.2 1B/3B, 3.2 11B/90B Vision, 3.3 70B. **Grok via xAI (OpenAI-compatible):** Grok-2, Grok-2-Vision, Grok-Beta. **Shared OpenAI-compatible helper** in src/openai_compatible.py processes a normalized request/response data structure; each _send_<vendor>() is a thin adapter at the boundary (data-oriented design per Fleury/Acton/Lottes). **MiniMax refactor:** ~250 lines reduced to ~50 by using the shared helper. Existing test_minimax_provider.py is the safety net. **UX adaptation:** 9 UI elements (screenshot, tools toggle, cache panel, stream progress, fetch models, token budget, cost panel) read from the matrix instead of hard-coding per-vendor branches. **Out of scope (deferred):** Anthropic/Gemini/DeepSeek migration to the matrix (separate track), audio input, server-side code execution, PDF input, batch API, fine-tuning. 6 phases planned: matrix+helper, Qwen, Grok+Llama, MiniMax refactor, UX adaptation, docs+archive.
This commit is contained in:
@@ -0,0 +1,122 @@
|
||||
{
|
||||
"track_id": "qwen_llama_grok_integration_20260606",
|
||||
"name": "Qwen, Llama & Grok Vendor Integration + Capability Matrix",
|
||||
"initialized": "2026-06-06",
|
||||
"owner": "tier2-tech-lead",
|
||||
"priority": "high",
|
||||
"status": "active",
|
||||
"type": "feature + refactor",
|
||||
"scope": {
|
||||
"new_files": [
|
||||
"src/vendor_capabilities.py",
|
||||
"src/openai_compatible.py",
|
||||
"tests/test_vendor_capabilities.py",
|
||||
"tests/test_openai_compatible.py",
|
||||
"tests/test_qwen_provider.py",
|
||||
"tests/test_llama_provider.py",
|
||||
"tests/test_grok_provider.py"
|
||||
],
|
||||
"modified_files": [
|
||||
"src/ai_client.py",
|
||||
"src/cost_tracker.py",
|
||||
"src/models.py",
|
||||
"src/gui_2.py",
|
||||
"src/app_controller.py",
|
||||
"credentials_template.toml",
|
||||
"pyproject.toml",
|
||||
"tests/test_minimax_provider.py",
|
||||
"docs/guide_ai_client.md",
|
||||
"docs/guide_models.md"
|
||||
]
|
||||
},
|
||||
"blocked_by": [],
|
||||
"blocks": ["anthropic_gemini_deepseek_capability_matrix_20260606" /* not yet created; conceptual follow-up */],
|
||||
"estimated_phases": 6,
|
||||
"spec": "spec.md",
|
||||
"plan": "plan.md",
|
||||
"priority_order": "A (capability matrix framework + 3 new vendors) > B (shared helper + MiniMax refactor) > C (UX adaptation + docs)",
|
||||
"capability_matrix_v1": ["vision", "tool_calling", "caching", "streaming", "model_discovery", "context_window", "cost_tracking"],
|
||||
"capability_matrix_deferred": ["audio_input", "pdf_input", "server_side_code_execution", "image_generation", "fine_tuning", "batch_api"],
|
||||
"data_oriented_design": {
|
||||
"shared_data_structure": "NormalizedResponse (text, tool_calls, usage_*) + OpenAICompatibleRequest (messages, tools, model, ...)",
|
||||
"shared_algorithm": "send_openai_compatible(client, request, capabilities) -> NormalizedResponse in src/openai_compatible.py",
|
||||
"per_vendor_boundary": "Each _send_<vendor>() is a thin adapter: init client, load history, call shared helper, update history, return text",
|
||||
"philosophy_references": ["Ryan Fleury (code/data separation)", "Mike Acton (data-oriented design)", "Timothy Lottes (cache-aware algorithms)"]
|
||||
},
|
||||
"vendors_added": {
|
||||
"qwen": {
|
||||
"api": "DashScope native SDK",
|
||||
"rationale": "Qwen-Audio, Qwen-Long (1M context), Qwen-VL-Max require native API; OpenAI-compatible mode loses them",
|
||||
"sdk": "dashscope>=1.14.0",
|
||||
"models_shipped": ["qwen-turbo", "qwen-plus", "qwen-max", "qwen-long", "qwen-vl-plus", "qwen-vl-max", "qwen-audio"]
|
||||
},
|
||||
"llama": {
|
||||
"api": "OpenAI-compatible (multi-backend)",
|
||||
"rationale": "Llama has no first-party API; backend is per-project config",
|
||||
"backends_v1": ["ollama (local)", "openrouter (cloud aggregator)", "custom_url (escape hatch)"],
|
||||
"models_shipped": ["llama-3.1-8b-instant", "llama-3.1-70b-versatile", "llama-3.1-405b-reasoning", "llama-3.2-1b-preview", "llama-3.2-3b-preview", "llama-3.2-11b-vision-preview", "llama-3.2-90b-vision-preview", "llama-3.3-70b-specdec"]
|
||||
},
|
||||
"grok": {
|
||||
"api": "xAI (OpenAI-compatible)",
|
||||
"rationale": "xAI's API is OpenAI-compatible; value is filling the matrix entry and exposing Grok-2-Vision",
|
||||
"sdk": "openai>=1.0.0 (already a dependency)",
|
||||
"models_shipped": ["grok-2", "grok-2-vision", "grok-beta"]
|
||||
}
|
||||
},
|
||||
"refactor_scope": {
|
||||
"minimax": "Refactor _send_minimax() (~250 lines) to use send_openai_compatible() helper (~50 lines)",
|
||||
"anthropic": "DEFERRED to follow-up track",
|
||||
"gemini": "DEFERRED to follow-up track",
|
||||
"deepseek": "DEFERRED to follow-up track"
|
||||
},
|
||||
"ux_adaptations": [
|
||||
"Screenshot button enabled iff vision=true",
|
||||
"Tools enabled toggle enabled iff tool_calling=true",
|
||||
"Cache panel visible iff caching=true",
|
||||
"Stream progress visible iff streaming=true",
|
||||
"Fetch Models button enabled iff model_discovery=true",
|
||||
"Token budget max = capabilities.context_window",
|
||||
"Cost panel shows estimate iff cost_tracking=true",
|
||||
"Cost panel shows 'Free (local)' for localhost + cost_tracking=false",
|
||||
"Cost panel shows '—' for other cost_tracking=false cases"
|
||||
],
|
||||
"architectural_invariant": "Every _send_<vendor>() is a thin boundary adapter; the shared algorithm lives in send_openai_compatible(); the capability matrix is the authoritative source of per-(vendor, model) feature support; the GUI adapts to the matrix, not to vendor names.",
|
||||
"threading_constraint": "Same as existing pattern: _send_lock serializes all send() calls; per-vendor history locks (e.g. _minimax_history_lock) guard history mutations; the shared helper is stateless and thread-safe (the OpenAI SDK is thread-safe for distinct clients; the caller owns the client).",
|
||||
"verification_criteria": [
|
||||
"src/vendor_capabilities.py:get_capabilities(vendor, model) returns correct VendorCapabilities for all 4 OpenAI-compatible vendors + Qwen models",
|
||||
"src/vendor_capabilities.py:get_capabilities fallback to vendor default when model not registered",
|
||||
"src/openai_compatible.py:send_openai_compatible handles streaming, non-streaming, tool calls, vision, errors",
|
||||
"src/openai_compatible.py:send_openai_compatible classifies OpenAI errors to ProviderError kinds",
|
||||
"_send_qwen() uses DashScope SDK; tool format translated from OpenAI shape",
|
||||
"_send_qwen() handles Qwen-VL vision (image base64), Qwen-Audio stub",
|
||||
"_send_llama() supports Ollama, OpenRouter, custom URL backends",
|
||||
"_send_llama() unions Ollama /api/tags and OpenRouter /v1/models for model discovery",
|
||||
"_send_grok() uses xAI endpoint (base_url hardcoded to https://api.x.ai/v1)",
|
||||
"_send_grok() handles Grok-2-Vision vision",
|
||||
"_send_minimax() refactored: ~50 lines instead of ~250, all existing test_minimax_provider.py tests pass",
|
||||
"GUI: screenshot button enabled iff capabilities.vision is true for the active (vendor, model)",
|
||||
"GUI: cost panel shows correct value (estimate, 'Free (local)', or '—') based on capabilities.cost_tracking and base URL",
|
||||
"GUI: 9 UX adaptations from spec.md §6 all work end-to-end",
|
||||
"No regressions in 273+ existing tests (full test suite passes)",
|
||||
"No new threading.Thread calls in src/ (per project invariant)",
|
||||
"No top-level heavy imports in src/ai_client.py beyond what's already there (dashscope import is acceptable; flag if it pushes import time > 100ms)"
|
||||
],
|
||||
"links": {
|
||||
"backlog_entry": "conductor/tracks.md (to be added)",
|
||||
"ai_client_guide": "docs/guide_ai_client.md",
|
||||
"models_guide": "docs/guide_models.md",
|
||||
"workflow_pitfalls": "conductor/workflow.md#known-pitfalls-2026-06-05",
|
||||
"related_tracks": [
|
||||
"conductor/tracks/openai_integration_20260308/",
|
||||
"conductor/tracks/zhipu_integration_20260308/",
|
||||
"conductor/tracks/startup_speedup_20260606/",
|
||||
"conductor/tracks/test_batching_refactor_20260606/"
|
||||
],
|
||||
"external_docs": [
|
||||
"https://help.aliyun.com/zh/model-studio/ (DashScope)",
|
||||
"https://openrouter.ai/docs (OpenRouter)",
|
||||
"https://github.com/ollama/ollama/blob/main/docs/openai.md (Ollama OpenAI compat)",
|
||||
"https://docs.x.ai/ (xAI)"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,483 @@
|
||||
# Track: Qwen, Llama & Grok Vendor Integration + Capability Matrix
|
||||
|
||||
**Status:** Active (spec approved 2026-06-06)
|
||||
**Initialized:** 2026-06-06
|
||||
**Owner:** Tier 2 Tech Lead
|
||||
**Priority:** High (extends vendor matrix; foundational for future open-source / self-hosted support)
|
||||
|
||||
---
|
||||
|
||||
## 1. Overview
|
||||
|
||||
This track adds first-class support for three new AI vendors — **Qwen** (via Alibaba DashScope native API), **Llama** (via Ollama local, OpenRouter cloud, and custom base URL), and **Grok** (via xAI's OpenAI-compatible endpoint) — alongside a new **Vendor Capability Matrix** that declares per-(vendor, model) feature support and lets the GUI adapt dynamically instead of hard-coding per-vendor UI branches.
|
||||
|
||||
The track also refactors the existing **MiniMax** provider to use a new shared OpenAI-compatible send helper, eliminating the duplicate OpenAI-compatible request/response logic that the new vendors would otherwise introduce. This is a data-oriented refactor (Fleury / Acton / Lottes framing): the shared helper is the algorithm that operates on a normalized message data structure; each vendor's entry point is a thin adapter that translates vendor-specific request/response shapes into the normalized form at the boundary.
|
||||
|
||||
The follow-up track "Anthropic / Gemini / DeepSeek Capability Matrix Migration" (see §13.1) will migrate the remaining three providers onto the same matrix in a separate effort. This track stays focused on the greenfield additions + the safe MiniMax refactor.
|
||||
|
||||
## 2. Goals (Priority Order)
|
||||
|
||||
| Priority | Goal | Rationale |
|
||||
|---|---|---|
|
||||
| **A (foundational)** | Vendor Capability Matrix framework. Per-(vendor, model) feature declarations. UX reads the matrix to enable/disable UI elements. | The user's stated architectural goal: "aggregate all those granular features into a feature support listing... the ux can adjust what's available." Per Casey Muratori's module-layer-boundary pattern: `ai_client` is the authoritative owner of "what can vendor X do"; `gui_2` adapts to that surface. |
|
||||
| **A (primary value)** | Qwen via DashScope native SDK. Wire Qwen-Plus, Qwen-Max, Qwen-Long (1M+ context), Qwen-VL-Plus, Qwen-VL-Max (vision), Qwen-Audio. | Qwen has a meaningful unique API surface (vs OpenAI-compatible). DashScope native SDK unlocks features that the OpenAI-compatible mode loses (Qwen-Audio, Qwen-Long custom chunking, Qwen-VL-Max enhanced vision). |
|
||||
| **A (primary value)** | Llama via Ollama (local) + OpenRouter (cloud) + custom base URL. | Llama has no first-party API. The "vendor" is the model family; the backend is per-project config. Ollama covers local; OpenRouter is the universal cloud aggregator (Together, Groq, Fireworks, etc. all flow through it); custom URL is the escape hatch for self-hosted / unusual backends. |
|
||||
| **A (primary value)** | Grok via xAI (OpenAI-compatible). Wire Grok-2, Grok-2-Vision. | xAI's API is OpenAI-compatible; the value is filling in the matrix entry and exposing Grok-2-Vision for the screenshot feature. |
|
||||
| **B (architectural)** | Shared OpenAI-compatible helper in `src/openai_compatible.py`. MiniMax, Llama, Grok all call into it. | Data-oriented design: share the algorithm (HTTP call, response parsing, tool-call detection, streaming, history repair, error classification) on a normalized data structure. Each vendor entry point is a thin adapter. |
|
||||
| **B (architectural)** | MiniMax refactored to use the shared helper. | MiniMax is already OpenAI-compatible; pure win, ~250 lines of duplicated logic deleted. Mitigated by existing `tests/test_minimax_provider.py`. |
|
||||
| **C (optimization)** | Capability matrix v1 populates for the 4 OpenAI-compatible vendors + Qwen. Anthropic/Gemini/DeepSeek get "pending migration" entries; the UX does not read them yet. | Half-baked matrix is worse than no matrix. Populating for the vendors that share the new helper keeps the matrix meaningful without risking regressions in the unique-API vendors. |
|
||||
| **C (optimization)** | UX adapts to the matrix: vision button hidden when `vision: false`; cache panel hidden when `caching: false`; cost panel shows "—" when `cost_tracking: false` (e.g., local backends). | The whole point of the matrix. Specific UI adaptations listed in §8. |
|
||||
|
||||
### 2.1 Non-Goals (this track)
|
||||
|
||||
- **Not** migrating Anthropic, Gemini, or DeepSeek to the capability matrix. They have genuinely unique APIs (4-breakpoint caching, genai SDK, raw HTTP) and their migration belongs in a separate, careful track. Stub entries: "pending_migration".
|
||||
- **Not** adding audio input support (Qwen-Audio's audio files). Audio is a deferred capability (§6).
|
||||
- **Not** adding server-side code execution. Deferred to §6.
|
||||
- **Not** changing the AI Settings panel layout beyond the minimum needed to expose the new providers and the capability-driven UI adaptations.
|
||||
- **Not** adding model fine-tuning management for any of the three new vendors.
|
||||
- **Not** adding batch API support for any of the three new vendors.
|
||||
|
||||
## 3. Architecture
|
||||
|
||||
### 3.1 Data-Oriented Design (Fleury / Acton / Lottes)
|
||||
|
||||
The user's design philosophy (referencing Ryan Fleury's code/data separation, Mike Acton's data-oriented design, Timothy Lottes' cache-aware algorithms) translates concretely to:
|
||||
|
||||
- **The data is the API.** The "OpenAI-compatible send" operates on a normalized data structure: `messages: list[dict]`, `tools: list[dict]`, `model_capabilities: VendorCapabilities`, `response: NormalizedResponse`. The structure is laid out linearly (SoA where applicable) and processed in bulk.
|
||||
- **The algorithm is shared.** One function: `send_openai_compatible(client, model, messages, tools, capabilities, *, stream_callback=None) -> NormalizedResponse`. It handles HTTP, response parsing, tool-call detection, streaming chunk aggregation, error classification, history repair, and token usage extraction — all on the normalized data.
|
||||
- **The adapters are per-vendor.** Each vendor's `_send_<vendor>()` is a thin function that:
|
||||
1. Initializes the vendor-specific client (OpenAI SDK with vendor's base URL + auth, or DashScope SDK).
|
||||
2. Loads the vendor's history (`_minimax_history`, `_llama_history`, etc.) and capabilities from the registry.
|
||||
3. Calls `send_openai_compatible(...)` (or, for Qwen, the DashScope-specific helper).
|
||||
4. Updates the vendor's history with the normalized response.
|
||||
5. Returns the text content to `ai_client.send()`.
|
||||
|
||||
This means:
|
||||
- **Adding a new OpenAI-compatible vendor** = 50 lines of glue (client init + capability declaration + history storage), not 300 lines of duplicated logic.
|
||||
- **Anthropic/Gemini/DeepKeep** stay per-vendor code paths; the data-oriented refactor doesn't apply to them because their unique APIs are not OpenAI-compatible-shaped.
|
||||
- **"Base paths are unique"** (the user's wording) means: `_send_qwen()`, `_send_llama()`, `_send_grok()`, `_send_minimax()` are the unique entry points; everything they call into is shared.
|
||||
|
||||
### 3.2 Module Layout
|
||||
|
||||
```
|
||||
src/
|
||||
ai_client.py # Modified: refactor _send_minimax; add _send_qwen/_send_llama/_send_grok
|
||||
vendor_capabilities.py # NEW: VendorCapabilities dataclass, registry, get_capabilities()
|
||||
openai_compatible.py # NEW: shared OpenAI-compatible send helper
|
||||
cost_tracker.py # Modified: add Qwen/Llama/Grok pricing
|
||||
models.py # Modified: add provider metadata for Qwen/Llama/Grok
|
||||
gui_2.py # Modified: register Qwen/Llama/Grok in PROVIDERS; capability-driven UI
|
||||
app_controller.py # Modified: same
|
||||
credentials_template.toml # Modified: add [qwen], [llama], [grok] sections
|
||||
```
|
||||
|
||||
```
|
||||
tests/
|
||||
test_vendor_capabilities.py # NEW: capability matrix tests
|
||||
test_openai_compatible.py # NEW: shared helper tests
|
||||
test_qwen_provider.py # NEW: Qwen-specific tests (DashScope adapter, history repair, error classification)
|
||||
test_llama_provider.py # NEW: Llama-specific tests (multi-backend, model discovery)
|
||||
test_grok_provider.py # NEW: Grok-specific tests (xAI endpoint, Grok-2-Vision)
|
||||
test_minimax_provider.py # Modified: verify refactor preserves behavior
|
||||
```
|
||||
|
||||
### 3.3 Capability Matrix v1 — 7 Capabilities
|
||||
|
||||
| Capability | Type | Purpose | UX Effect |
|
||||
|---|---|---|---|
|
||||
| `vision` | `bool` | Can accept image inputs (screenshots). | Screenshot button enabled/disabled in message panel. |
|
||||
| `tool_calling` | `bool` | Supports function/tool calls. | Tool system toggle; "Tools enabled" indicator. |
|
||||
| `caching` | `bool` | Supports server-side prompt caching (Gemini explicit, Anthropic ephemeral). | Cache panel visible/hidden. Cache indicators in token budget. |
|
||||
| `streaming` | `bool` | Supports streaming responses. | Stream progress bar visible/hidden. |
|
||||
| `model_discovery` | `bool` | Backend exposes `/v1/models` (or equivalent) for live model list. | "Fetch Models" button enabled/disabled. |
|
||||
| `context_window` | `int` | Maximum input tokens for this model. | Token budget panel max. |
|
||||
| `cost_tracking` | `bool` | Per-token pricing known. | Cost panel shows estimate; hides with "—" for unknown. |
|
||||
|
||||
**Deferred to v2 (separate track):**
|
||||
- `audio_input` (Qwen-Audio only)
|
||||
- `pdf_input` (Gemini, Anthropic)
|
||||
- `server_side_code_execution` (Anthropic, OpenAI, Gemini)
|
||||
- `image_generation`, `fine_tuning`, `batch_api` (none currently)
|
||||
|
||||
### 3.4 Per-(vendor, model) Capabilities
|
||||
|
||||
Capabilities are declared per-model, not per-vendor, because a vendor can have both vision and text-only models (Qwen: Qwen-VL-Plus vs Qwen-Plus; Llama: 3.2-Vision vs 3.2-1B/3B; Grok: Grok-2-Vision vs Grok-2).
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class VendorCapabilities:
|
||||
vendor: str # "qwen" | "llama" | "grok" | "minimax" | "anthropic" | "gemini" | ...
|
||||
model: str # the model name, e.g. "qwen-vl-max" or "*" for vendor default
|
||||
vision: bool = False
|
||||
tool_calling: bool = True
|
||||
caching: bool = False
|
||||
streaming: bool = True
|
||||
model_discovery: bool = True
|
||||
context_window: int = 8192 # tokens
|
||||
cost_tracking: bool = True # False for local backends where cost is unknown/free
|
||||
cost_input_per_mtok: float = 0.0 # USD per million input tokens
|
||||
cost_output_per_mtok: float = 0.0 # USD per million output tokens
|
||||
notes: str = ""
|
||||
```
|
||||
|
||||
**Lookup pattern:** `get_capabilities(vendor, model) -> VendorCapabilities`. The registry is a flat dict keyed by `(vendor, model)`. Lookups fall back to the vendor's default entry if a specific model isn't registered.
|
||||
|
||||
**Registry source of truth:** `src/vendor_capabilities.py` has a hardcoded `_REGISTRY: dict[tuple[str, str], VendorCapabilities]` populated at import time. The data is in code (not TOML) because:
|
||||
- It's referenced by `_send_<vendor>()` per call (hot path; can't afford file I/O).
|
||||
- Changes are tied to vendor SDK updates and are code-reviewed.
|
||||
- TOML is for user-config (credentials, project settings); vendor capabilities are platform facts.
|
||||
|
||||
## 4. Per-Vendor Designs
|
||||
|
||||
### 4.1 Qwen via DashScope Native SDK
|
||||
|
||||
**Why native (not OpenAI-compatible mode):** DashScope's native API unlocks Qwen-Audio, Qwen-Long (1M+ context with custom chunking), Qwen-VL-Max (enhanced vision), and DashScope-specific tool format with `parameters` schema. OpenAI-compatible mode loses these.
|
||||
|
||||
**SDK:** `dashscope` (added to `pyproject.toml` dependencies).
|
||||
|
||||
**State (module-level globals, following the existing pattern):**
|
||||
```python
|
||||
_qwen_client: dashscope.Generation | None = None
|
||||
_qwen_history: list[dict[str, Any]] = []
|
||||
_qwen_history_lock: threading.Lock = threading.Lock()
|
||||
```
|
||||
|
||||
**Credentials:** `credentials.toml` `[qwen]` section with `api_key` and optional `region` (default: `china`; alternatives: `international`).
|
||||
|
||||
**Configuration per-project (TOML):** `provider = "qwen"`, `qwen_model = "qwen-max"`. Optional `qwen_region = "international"`.
|
||||
|
||||
**Models shipped in the capability registry (v1):**
|
||||
|
||||
| Model | vision | tool_calling | caching | context_window | cost_input | cost_output |
|
||||
|---|---|---|---|---|---|---|
|
||||
| `qwen-turbo` | false | true | false | 1,000,000 | $0.05 | $0.10 |
|
||||
| `qwen-plus` | false | true | false | 131,072 | $0.40 | $1.20 |
|
||||
| `qwen-max` | false | true | false | 32,768 | $2.00 | $6.00 |
|
||||
| `qwen-long` | false | true | false | 1,000,000 | $0.07 | $0.28 |
|
||||
| `qwen-vl-plus` | true | true | false | 131,072 | $0.21 | $0.63 |
|
||||
| `qwen-vl-max` | true | true | false | 32,768 | $0.50 | $1.50 |
|
||||
| `qwen-audio` | true (audio) | true | false | 32,768 | $0.10 | $0.30 |
|
||||
|
||||
(Pricing from Alibaba Cloud DashScope public pricing as of 2026-06-06; update if needed.)
|
||||
|
||||
**Entry point:** `_send_qwen()` in `src/ai_client.py`. Calls a DashScope-specific helper (not the OpenAI-compatible one) because DashScope's request/response shape differs.
|
||||
|
||||
**Tool format translation:** DashScope uses a slightly different tool schema than OpenAI. The Qwen adapter translates from the normalized tool definitions (OpenAI-shaped) to DashScope's `tools: list[dict]` with `parameters: dict` schema.
|
||||
|
||||
**Vision / audio:** Qwen-VL accepts image URLs or base64; Qwen-Audio accepts audio URLs or base64. The adapter handles the multipart encoding.
|
||||
|
||||
**Error classification:** `_classify_qwen_error()` maps DashScope exceptions to `ProviderError` kinds (`quota`, `rate_limit`, `auth`, `balance`, `network`).
|
||||
|
||||
**Model discovery:** DashScope exposes a `list_models` API. `_list_qwen_models()` returns the hardcoded registry (DashScope doesn't have a great runtime discovery API; the hardcoded list is the source of truth).
|
||||
|
||||
**Vision support:** Qwen-Audio and Qwen-VL-* register `vision: true`. The UX's screenshot button is enabled for those models. For Qwen-Audio, the screenshot button is replaced with an audio attachment button (deferred to v2; for v1, audio attachment is wired but the button is hidden — see §6).
|
||||
|
||||
### 4.2 Llama (Ollama + OpenRouter + Custom URL)
|
||||
|
||||
**Why three backends:** Llama has no first-party API. The "vendor" is the model family; the backend is per-project config.
|
||||
- **Ollama** (local, ubiquitous): OpenAI-compatible at `http://localhost:11434/v1`. Free.
|
||||
- **OpenRouter** (cloud aggregator): OpenAI-compatible at `https://openrouter.ai/api/v1`. Single API key covers Together, Groq, Fireworks, etc.
|
||||
- **Custom URL** (escape hatch): any OpenAI-compatible endpoint. For self-hosted vLLM, llama.cpp, LM Studio, or any unusual cloud.
|
||||
|
||||
**SDK:** `openai` (already a dependency, used for MiniMax).
|
||||
|
||||
**State (module-level globals):**
|
||||
```python
|
||||
_llama_client: OpenAI | None = None
|
||||
_llama_history: list[dict[str, Any]] = []
|
||||
_llama_history_lock: threading.Lock = threading.Lock()
|
||||
_llama_base_url: str = "http://localhost:11434/v1" # default
|
||||
_llama_api_key: str = "ollama" # Ollama doesn't require auth
|
||||
```
|
||||
|
||||
**Credentials:** `credentials.toml` `[llama]` section with `api_key` (empty for Ollama) and `base_url`.
|
||||
|
||||
**Configuration per-project (TOML):** `provider = "llama"`, `llama_model = "llama-3.3-70b"`, `llama_base_url = "https://openrouter.ai/api/v1"`, `llama_api_key_env = "OPENROUTER_API_KEY"` (optional env override).
|
||||
|
||||
**Models shipped in the capability registry (v1):**
|
||||
|
||||
| Model | vision | tool_calling | caching | context_window | cost_input | cost_output |
|
||||
|---|---|---|---|---|---|---|
|
||||
| `llama-3.1-8b-instant` | false | true | false | 131,072 | $0.05 (Groq) | $0.08 |
|
||||
| `llama-3.1-70b-versatile` | false | true | false | 131,072 | $0.59 (Groq) | $0.79 |
|
||||
| `llama-3.1-405b-reasoning` | false | true | false | 131,072 | $3.00 (OpenRouter avg) | $3.00 |
|
||||
| `llama-3.2-1b-preview` | false | true | false | 131,072 | $0.04 | $0.04 |
|
||||
| `llama-3.2-3b-preview` | false | true | false | 131,072 | $0.06 | $0.06 |
|
||||
| `llama-3.2-11b-vision-preview` | true | true | false | 131,072 | $0.18 | $0.18 |
|
||||
| `llama-3.2-90b-vision-preview` | true | true | false | 131,072 | $0.90 | $0.90 |
|
||||
| `llama-3.3-70b-specdec` | false | true | false | 131,072 | $0.59 (Groq) | $0.79 |
|
||||
| `llama-*` (wildcard) | model-specific | true | false | 131,072 | $0 | $0 |
|
||||
|
||||
(Pricing varies by backend; registry entries represent the most common case. Cost overrides per-project allowed via TOML.)
|
||||
|
||||
**Local backend default:** When `llama_base_url` is `http://localhost:11434/v1` and `llama_api_key` is empty, `cost_tracking: false` (free). UX cost panel shows "Free (local)" instead of an estimate.
|
||||
|
||||
**Entry point:** `_send_llama()` in `src/ai_client.py`. Calls the shared `send_openai_compatible()` helper.
|
||||
|
||||
**Tool format:** Native OpenAI (Llama backends all use OpenAI's tool format). No translation needed.
|
||||
|
||||
**Error classification:** `_classify_llama_error()` — same as MiniMax's error classifier (OpenAI SDK errors are uniform across backends).
|
||||
|
||||
**Model discovery:** Ollama exposes `GET /api/tags` (not `/v1/models`); OpenRouter exposes `GET /v1/models`. The Llama adapter probes both endpoints and unions the results. For custom URLs, falls back to the hardcoded registry.
|
||||
|
||||
### 4.3 Grok via xAI (OpenAI-Compatible)
|
||||
|
||||
**SDK:** `openai` (already a dependency).
|
||||
|
||||
**State:**
|
||||
```python
|
||||
_grok_client: OpenAI | None = None
|
||||
_grok_history: list[dict[str, Any]] = []
|
||||
_grok_history_lock: threading.Lock = threading.Lock()
|
||||
```
|
||||
|
||||
**Credentials:** `credentials.toml` `[grok]` section with `api_key`. (xAI's `base_url` is hardcoded to `https://api.x.ai/v1`.)
|
||||
|
||||
**Configuration per-project (TOML):** `provider = "grok"`, `grok_model = "grok-2"`.
|
||||
|
||||
**Models shipped in the capability registry (v1):**
|
||||
|
||||
| Model | vision | tool_calling | caching | context_window | cost_input | cost_output |
|
||||
|---|---|---|---|---|---|---|
|
||||
| `grok-2` | false | true | false | 131,072 | $2.00 | $10.00 |
|
||||
| `grok-2-vision` | true | true | false | 32,768 | $2.00 | $10.00 |
|
||||
| `grok-beta` | false | true | false | 131,072 | $5.00 | $15.00 |
|
||||
|
||||
(Pricing from x.ai public pricing as of 2026-06-06; update if needed.)
|
||||
|
||||
**Entry point:** `_send_grok()` in `src/ai_client.py`. Calls `send_openai_compatible()` with the xAI base URL.
|
||||
|
||||
**Tool format:** Native OpenAI. No translation needed.
|
||||
|
||||
**Vision:** Grok-2-Vision accepts image URLs or base64. The OpenAI-compatible helper already handles vision via the OpenAI SDK's multimodal message format.
|
||||
|
||||
**Error classification:** Same as OpenAI-compatible vendors (uniform error shape via the openai SDK).
|
||||
|
||||
**Model discovery:** xAI exposes `GET /v1/models`. Standard OpenAI-compatible discovery.
|
||||
|
||||
## 5. Shared OpenAI-Compatible Helper
|
||||
|
||||
### 5.1 Module: `src/openai_compatible.py`
|
||||
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Optional
|
||||
from openai import OpenAI, OpenAIError
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NormalizedResponse:
|
||||
text: str
|
||||
tool_calls: list[dict[str, Any]]
|
||||
usage_input_tokens: int
|
||||
usage_output_tokens: int
|
||||
usage_cache_read_tokens: int
|
||||
usage_cache_creation_tokens: int
|
||||
raw_response: Any
|
||||
|
||||
@dataclass
|
||||
class OpenAICompatibleRequest:
|
||||
messages: list[dict[str, Any]]
|
||||
tools: Optional[list[dict[str, Any]]] = None
|
||||
model: str = ""
|
||||
temperature: float = 0.0
|
||||
top_p: float = 1.0
|
||||
max_tokens: int = 8192
|
||||
stream: bool = False
|
||||
stream_callback: Optional[Callable[[str], None]] = None
|
||||
|
||||
def send_openai_compatible(
|
||||
client: OpenAI,
|
||||
request: OpenAICompatibleRequest,
|
||||
*,
|
||||
capabilities: VendorCapabilities,
|
||||
) -> NormalizedResponse: ...
|
||||
```
|
||||
|
||||
The helper:
|
||||
1. Translates `request.messages` into the OpenAI SDK's `messages` parameter (passthrough — already in OpenAI shape).
|
||||
2. Translates `request.tools` if non-None (passthrough for now; future: strip unsupported fields based on `capabilities`).
|
||||
3. Calls `client.chat.completions.create(...)` with the right `model`, `temperature`, `top_p`, `max_tokens`, `stream`, `tools`, `tool_choice="auto"`.
|
||||
4. If streaming: aggregates chunks; calls `stream_callback(text_chunk)` for each text delta; collects final usage from the last chunk.
|
||||
5. If non-streaming: parses the response in one shot.
|
||||
6. Returns a `NormalizedResponse` with text, tool calls (in OpenAI shape), usage stats.
|
||||
7. On exception: classifies the OpenAI exception and re-raises as `ProviderError` (using `_classify_openai_compatible_error()`).
|
||||
|
||||
The helper is the **algorithm on the data**. Per-vendor adapters (Llama, Grok, MiniMax) are the **boundary code that converts vendor-specific state to/from the normalized form**.
|
||||
|
||||
### 5.2 Refactor of `_send_minimax()`
|
||||
|
||||
**Before:** ~250 lines of inline OpenAI-compatible send logic (lines 2103-2264 of `src/ai_client.py` per the existing grep). Mixes client init, message building, API call, response parsing, tool call handling, history repair, error classification.
|
||||
|
||||
**After:** ~50 lines. `_send_minimax()` becomes:
|
||||
```python
|
||||
def _send_minimax(md_content, user_message, base_dir, file_items, discussion_history, ...):
|
||||
_ensure_minimax_client()
|
||||
with _minimax_history_lock:
|
||||
_repair_minimax_history(_minimax_history)
|
||||
if discussion_history and not _minimax_history:
|
||||
_minimax_history.extend(_parse_discussion_history(discussion_history))
|
||||
_minimax_history.append({"role": "user", "content": _build_user_content(...)})
|
||||
|
||||
request = OpenAICompatibleRequest(
|
||||
messages=_minimax_history,
|
||||
tools=_build_tools(...),
|
||||
model=_model,
|
||||
temperature=_temperature,
|
||||
top_p=_top_p,
|
||||
max_tokens=_max_tokens,
|
||||
stream=True,
|
||||
stream_callback=stream_callback,
|
||||
)
|
||||
caps = get_capabilities("minimax", _model)
|
||||
response = send_openai_compatible(_minimax_client, request, capabilities=caps)
|
||||
|
||||
# Append response to history (same logic as today)
|
||||
...
|
||||
return response.text
|
||||
```
|
||||
|
||||
The behavior is identical; the code is shorter. `tests/test_minimax_provider.py` is the safety net (existing test coverage should pass without modification).
|
||||
|
||||
## 6. UX Adaptation (Capability-Driven UI)
|
||||
|
||||
The GUI reads `get_capabilities(active_vendor, active_model)` once per render frame and stores it in a local. Specific adaptations:
|
||||
|
||||
| UI Element | Behavior based on matrix |
|
||||
|---|---|
|
||||
| **Screenshot button** (Message panel) | Enabled iff `vision: true`. Tooltip explains why if disabled. |
|
||||
| **Audio attachment button** (Message panel) | **Deferred to v2.** Stub: always hidden in v1. |
|
||||
| **Tools enabled toggle** (Message panel) | Enabled iff `tool_calling: true`. |
|
||||
| **Cache panel** (Operations Hub) | Visible iff `caching: true`. |
|
||||
| **Cache indicators** (Token budget) | Shown iff `caching: true`. |
|
||||
| **Stream progress** (Response panel) | Visible iff `streaming: true`. |
|
||||
| **Fetch Models button** (AI Settings) | Enabled iff `model_discovery: true`. |
|
||||
| **Token budget max** (Token budget) | Set to `capabilities.context_window`. |
|
||||
| **Cost estimate** (MMA Dashboard) | Shown iff `cost_tracking: true`; shows "Free (local)" for `cost_tracking: false` + `base_url` containing `localhost`/`127.0.0.1`; shows "—" for other `cost_tracking: false` cases. |
|
||||
|
||||
The adaptations are gated on the capability value, not on vendor name. The `gui_2.py` change is one new helper: `def _get_active_capabilities(self) -> VendorCapabilities: return get_capabilities(self._provider, self._model)`. The render functions query this once at the top of their scope.
|
||||
|
||||
## 7. Configuration
|
||||
|
||||
### 7.1 `pyproject.toml` — new dependency
|
||||
|
||||
```toml
|
||||
[project]
|
||||
dependencies = [
|
||||
...
|
||||
"dashscope>=1.14.0", # NEW
|
||||
"openai>=1.0.0", # already a dependency
|
||||
]
|
||||
```
|
||||
|
||||
### 7.2 `credentials.toml` — new sections
|
||||
|
||||
```toml
|
||||
[qwen]
|
||||
api_key = "YOUR_DASHSCOPE_KEY"
|
||||
# region = "china" # default; "international" also valid
|
||||
|
||||
[llama]
|
||||
# api_key = "YOUR_OPENROUTER_KEY" # required for OpenRouter; empty for Ollama
|
||||
# base_url = "https://openrouter.ai/api/v1" # default for cloud; "http://localhost:11434/v1" for Ollama
|
||||
|
||||
[grok]
|
||||
api_key = "YOUR_XAI_KEY"
|
||||
```
|
||||
|
||||
### 7.3 Per-project TOML — provider selection
|
||||
|
||||
```toml
|
||||
[ai]
|
||||
provider = "qwen" # "qwen" | "llama" | "grok" | (existing: "gemini", "anthropic", ...)
|
||||
model = "qwen-vl-max"
|
||||
qwen_region = "china" # vendor-specific
|
||||
# OR
|
||||
llama_base_url = "https://openrouter.ai/api/v1"
|
||||
llama_api_key_env = "OPENROUTER_API_KEY" # optional: read key from env
|
||||
# OR
|
||||
grok_model = "grok-2-vision"
|
||||
```
|
||||
|
||||
## 8. Testing Strategy
|
||||
|
||||
| Test File | Purpose | Coverage Target |
|
||||
|---|---|---|
|
||||
| `tests/test_vendor_capabilities.py` | Registry lookup, fallback to vendor default, per-model overrides. | 100% |
|
||||
| `tests/test_openai_compatible.py` | Request building, response parsing, streaming aggregation, tool call detection, error classification. | 90% |
|
||||
| `tests/test_qwen_provider.py` | DashScope adapter, tool format translation, Qwen-VL vision, Qwen-Audio stub. | 80% |
|
||||
| `tests/test_llama_provider.py` | Multi-backend (Ollama mock + OpenRouter mock), model discovery union, custom URL fallback. | 80% |
|
||||
| `tests/test_grok_provider.py` | xAI endpoint, Grok-2-Vision vision, model discovery. | 80% |
|
||||
| `tests/test_minimax_provider.py` (modified) | Verify refactor preserves behavior. Existing tests should pass unmodified. | 100% (regression) |
|
||||
|
||||
**Mocking strategy:** All tests use `unittest.mock.patch` on the vendor SDKs (DashScope, OpenAI). No real API calls. The `RUN_REAL_AI_TESTS=1` env var continues to gate opt-in real-API tests (out of scope for this track).
|
||||
|
||||
**Integration verification:** Manual smoke test in the GUI: select Qwen provider, send a message with a tool call, confirm the tool executes. Repeat for Llama and Grok. Document the smoke test results in the Phase 4 checkpoint git note.
|
||||
|
||||
## 9. Migration / Rollout
|
||||
|
||||
| Phase | What | Risk |
|
||||
|---|---|---|
|
||||
| **Phase 1 — Capability matrix framework + shared helper** | Add `src/vendor_capabilities.py` and `src/openai_compatible.py`. Add unit tests for both. Add `dashscope` to `pyproject.toml`. No user-facing changes. | Low. New files, no modifications to `ai_client.py`. |
|
||||
| **Phase 2 — Qwen via DashScope** | Implement `_send_qwen()` in `src/ai_client.py`. Add `[qwen]` to credentials template. Register `qwen` in `PROVIDERS` lists. Populate capability registry for Qwen models. | Medium. New SDK, new code path, new credentials section. |
|
||||
| **Phase 3 — Grok + Llama via shared helper** | Implement `_send_grok()` and `_send_llama()`. Both call `send_openai_compatible()`. Add `[grok]` and `[llama]` credentials sections. Register in PROVIDERS lists. | Medium. New code paths, but lighter than Qwen (OpenAI-compatible). |
|
||||
| **Phase 4 — MiniMax refactor** | Refactor `_send_minimax()` to use the shared helper. Verify all existing `tests/test_minimax_provider.py` tests pass. | Medium-High. Touching working code. Mitigated by existing test coverage. |
|
||||
| **Phase 5 — UX adaptation + integration** | Add `_get_active_capabilities()` to `gui_2.py`. Apply the 9 UI adaptations from §6. Run the full test suite. | Low. UI-only changes. |
|
||||
| **Phase 6 — Docs + archive** | Update `docs/guide_ai_client.md` to document the new vendors, the capability matrix, and the shared helper. Update `docs/guide_models.md` for the new PROVIDERS entries. Archive the track. | Low. |
|
||||
|
||||
Each phase has its own checkpoint commit and git note.
|
||||
|
||||
## 10. Risks & Mitigations
|
||||
|
||||
| Risk | Likelihood | Impact | Mitigation |
|
||||
|---|---|---|---|
|
||||
| MiniMax refactor breaks existing behavior. | Medium | High (regresses a working provider) | `tests/test_minimax_provider.py` is the safety net. Run it after every change. If it fails, the refactor is incorrect — fix forward, don't revert. |
|
||||
| DashScope SDK has API differences from documentation (e.g., response shape). | Medium | Medium | Pin to a specific DashScope version (`>=1.14.0,<2.0.0`). Test against the actual SDK in CI. |
|
||||
| OpenRouter pricing varies by underlying model; registry entries may be inaccurate. | High | Low (cost estimates are advisory) | Cost panel shows "Estimate" with a tooltip. Add a "Pricing source: x" line. |
|
||||
| Ollama's `/api/tags` shape differs from `/v1/models`; the union function may miss models. | Low | Low (model list is a convenience) | Fall back to the hardcoded registry. Manual override per-project via TOML. |
|
||||
| Capability matrix drift: a model ships a new feature (e.g., Qwen-Plus gains vision) but the registry says `vision: false`. | Medium | Low (user sees a missing feature) | Document the update process: edit `src/vendor_capabilities.py`, add a test, commit. Make the registry the canonical place to look. |
|
||||
| Local backends (Ollama) need CORS / firewall configured for the GUI to talk to them. | Low | Medium (user can't connect) | Document the Ollama setup in the credentials template comments. Reference the Ollama docs for `OLLAMA_ORIGINS`. |
|
||||
| Llama backends may rate-limit aggressively (especially free tiers of OpenRouter). | Medium | Low | The existing `_classify_openai_compatible_error()` already maps 429 to `rate_limit`. The error UI surfaces this clearly. |
|
||||
|
||||
## 11. Out of Scope (Explicit)
|
||||
|
||||
- **Audio input support** (Qwen-Audio, future Grok-Audio). Deferred to a follow-up track that adds an audio attachment button to the message panel and a `audio_input` capability to the matrix.
|
||||
- **Server-side code execution** (Anthropic, OpenAI, Gemini). Deferred; the matrix has a placeholder entry `server_side_code_execution: false` for all v1 vendors.
|
||||
- **Anthropic / Gemini / DeepSeek capability matrix migration**. Tracked as a separate track ("Open-Vendor Matrix Migration Phase 2" — see §13.1). Their unique APIs need careful, vendor-by-vendor migration.
|
||||
- **Batch API support** for any of the three new vendors. Not requested.
|
||||
- **Fine-tuning management** for any of the three new vendors. Not requested.
|
||||
- **Image generation** (DALL-E, Midjourney, etc.). Not in scope; the matrix has a placeholder `image_generation: false`.
|
||||
- **PDF input** (Gemini, Anthropic). Deferred.
|
||||
|
||||
## 12. Open Questions
|
||||
|
||||
1. **Per-model cost overrides:** Should `manual_slop.toml` allow per-project cost overrides for Llama backends (since pricing varies by which underlying provider OpenRouter routes to)? (Proposal: yes; add `llama_cost_input` / `llama_cost_output` to the per-project TOML.)
|
||||
2. **Default Llama base URL:** Should the default be Ollama (`localhost:11434`) or OpenRouter? (Proposal: Ollama for the "first-time user gets a working setup" experience; OpenRouter requires an API key.)
|
||||
3. **DashScope region selection:** How does the user pick `china` vs `international`? Per-project TOML (`qwen_region = "international"`) or env var (`DASHSCOPE_REGION`)? (Proposal: both; TOML wins.)
|
||||
4. **Qwen-Coder and Qwen-Math specialized models:** Include in v1 or defer? (Proposal: defer to v1.1; the matrix entry is trivial but the model-specific prompting optimization is out of scope.)
|
||||
|
||||
## 13. See Also
|
||||
|
||||
### 13.1 Follow-up Track (separate plan)
|
||||
|
||||
**"Anthropic / Gemini / DeepSeek Capability Matrix Migration"** — Migrates the three remaining providers onto the same capability matrix. Required pre-work: ensure the matrix's per-model lookup pattern handles the `caching: true` (Anthropic 4-breakpoint, Gemini explicit) and `pdf_input: true` (Anthropic, Gemini) capabilities. Each provider keeps its unique per-vendor code path (the 4-breakpoint system, the genai SDK); the matrix entries are populated so the UX can adapt. This is a separate track because the migration of each unique-API provider is non-trivial and the risk of regressing the existing working code is high.
|
||||
|
||||
### 13.2 Project References
|
||||
|
||||
- `docs/guide_ai_client.md` — current `ai_client.py` architecture; will be updated in Phase 6 to document the matrix and the shared helper.
|
||||
- `docs/guide_models.md` — current PROVIDERS constant and provider metadata; will be updated in Phase 6.
|
||||
- `conductor/tracks/openai_integration_20260308/` — closest prior art (single provider, OpenAI-compatible).
|
||||
- `conductor/tracks/zhipu_integration_20260308/` — second prior art (single provider, custom API).
|
||||
- `conductor/tracks/startup_speedup_20260606/` — example of an active track in this project (same convention).
|
||||
- `conductor/tracks/test_batching_refactor_20260606/` — second example of an active track in this project.
|
||||
- `conductor/product.md` "Multi-Provider Integration" — product-level overview of the multi-provider architecture.
|
||||
- `conductor/product-guidelines.md` "Modular Controller Pattern" — the convention this track follows for `vendor_capabilities.py` and `openai_compatible.py` as standalone modules.
|
||||
|
||||
### 13.3 External References
|
||||
|
||||
- **Ryan Fleury on code/data separation** — informs the data-oriented design (vendor capabilities as data, helper as algorithm, per-vendor code as boundary adapter).
|
||||
- **Mike Acton on data-oriented design** — informs the SoA-like layout of the capability matrix and the "transform data, don't mutate state" framing.
|
||||
- **Timothy Lottes on cache-aware algorithms** — informs the helper's streaming aggregation (bulk-process chunks, minimize per-chunk overhead).
|
||||
- **Alibaba DashScope documentation** — `https://help.aliyun.com/zh/model-studio/` for the native API reference.
|
||||
- **OpenRouter API documentation** — `https://openrouter.ai/docs` for the cloud aggregator.
|
||||
- **Ollama OpenAI compatibility** — `https://github.com/ollama/ollama/blob/main/docs/openai.md` for the local backend.
|
||||
- **xAI API documentation** — `https://docs.x.ai/` for the Grok endpoint.
|
||||
@@ -0,0 +1,134 @@
|
||||
# Track state for qwen_llama_grok_integration_20260606
|
||||
# Updated by Tier 2 Tech Lead as tasks complete
|
||||
|
||||
[meta]
|
||||
track_id = "qwen_llama_grok_integration_20260606"
|
||||
name = "Qwen, Llama & Grok Vendor Integration + Capability Matrix"
|
||||
status = "active"
|
||||
current_phase = 0
|
||||
last_updated = "2026-06-06"
|
||||
|
||||
[phases]
|
||||
# Phase 1: Capability matrix framework + shared helper (no user-facing changes)
|
||||
phase_1 = { status = "pending", checkpoint_sha = "", name = "Capability matrix framework + shared helper" }
|
||||
# Phase 2: Qwen via DashScope
|
||||
phase_2 = { status = "pending", checkpoint_sha = "", name = "Qwen via DashScope" }
|
||||
# Phase 3: Grok + Llama via shared helper
|
||||
phase_3 = { status = "pending", checkpoint_sha = "", name = "Grok + Llama via shared helper" }
|
||||
# Phase 4: MiniMax refactor
|
||||
phase_4 = { status = "pending", checkpoint_sha = "", name = "MiniMax refactor to use shared helper" }
|
||||
# Phase 5: UX adaptation + integration
|
||||
phase_5 = { status = "pending", checkpoint_sha = "", name = "UX adaptation + integration" }
|
||||
# Phase 6: Docs + archive
|
||||
phase_6 = { status = "pending", checkpoint_sha = "", name = "Docs + archive" }
|
||||
|
||||
[tasks]
|
||||
# Phase 1: Capability matrix framework + shared helper
|
||||
# (Tasks TBD by writing-plans; placeholder structure only)
|
||||
t1_1 = { status = "pending", commit_sha = "", description = "Red: tests/test_vendor_capabilities.py::test_registry_lookup_known_model" }
|
||||
t1_2 = { status = "pending", commit_sha = "", description = "Red: tests/test_vendor_capabilities.py::test_fallback_to_vendor_default" }
|
||||
t1_3 = { status = "pending", commit_sha = "", description = "Red: tests/test_vendor_capabilities.py::test_unknown_vendor_raises" }
|
||||
t1_4 = { status = "pending", commit_sha = "", description = "Green: implement src/vendor_capabilities.py with VendorCapabilities + get_capabilities + initial registry" }
|
||||
t1_5 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_send_non_streaming" }
|
||||
t1_6 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_send_streaming_aggregates_chunks" }
|
||||
t1_7 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_tool_call_detection" }
|
||||
t1_8 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_vision_multimodal_message" }
|
||||
t1_9 = { status = "pending", commit_sha = "", description = "Red: tests/test_openai_compatible.py::test_error_classification_429_to_rate_limit" }
|
||||
t1_10 = { status = "pending", commit_sha = "", description = "Green: implement src/openai_compatible.py with NormalizedResponse + OpenAICompatibleRequest + send_openai_compatible" }
|
||||
t1_11 = { status = "pending", commit_sha = "", description = "Add dashscope>=1.14.0,<2.0.0 to pyproject.toml dependencies" }
|
||||
t1_12 = { status = "pending", commit_sha = "", description = "Phase 1 checkpoint commit + git note" }
|
||||
# Phase 2: Qwen via DashScope
|
||||
t2_1 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_send_qwen_routes_to_dashscope" }
|
||||
t2_2 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_qwen_tool_format_translation" }
|
||||
t2_3 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_qwen_vl_vision_image_base64" }
|
||||
t2_4 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_qwen_error_classification" }
|
||||
t2_5 = { status = "pending", commit_sha = "", description = "Red: tests/test_qwen_provider.py::test_list_qwen_models" }
|
||||
t2_6 = { status = "pending", commit_sha = "", description = "Green: implement _send_qwen, _ensure_qwen_client, _classify_qwen_error, _list_qwen_models in src/ai_client.py" }
|
||||
t2_7 = { status = "pending", commit_sha = "", description = "Add [qwen] section to credentials_template.toml" }
|
||||
t2_8 = { status = "pending", commit_sha = "", description = "Add qwen to PROVIDERS in src/gui_2.py and src/app_controller.py" }
|
||||
t2_9 = { status = "pending", commit_sha = "", description = "Add Qwen models to capability registry in src/vendor_capabilities.py" }
|
||||
t2_10 = { status = "pending", commit_sha = "", description = "Add Qwen pricing to src/cost_tracker.py" }
|
||||
t2_11 = { status = "pending", commit_sha = "", description = "Phase 2 checkpoint commit + git note" }
|
||||
# Phase 3: Grok + Llama via shared helper
|
||||
t3_1 = { status = "pending", commit_sha = "", description = "Red: tests/test_grok_provider.py::test_send_grok_uses_xai_endpoint" }
|
||||
t3_2 = { status = "pending", commit_sha = "", description = "Red: tests/test_grok_provider.py::test_grok_2_vision_vision_support" }
|
||||
t3_3 = { status = "pending", commit_sha = "", description = "Green: implement _send_grok, _ensure_grok_client in src/ai_client.py" }
|
||||
t3_4 = { status = "pending", commit_sha = "", description = "Add [grok] section to credentials_template.toml" }
|
||||
t3_5 = { status = "pending", commit_sha = "", description = "Add grok to PROVIDERS in src/gui_2.py and src/app_controller.py" }
|
||||
t3_6 = { status = "pending", commit_sha = "", description = "Add Grok models to capability registry" }
|
||||
t3_7 = { status = "pending", commit_sha = "", description = "Add Grok pricing to src/cost_tracker.py" }
|
||||
t3_8 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_send_llama_ollama_backend" }
|
||||
t3_9 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_send_llama_openrouter_backend" }
|
||||
t3_10 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_send_llama_custom_url" }
|
||||
t3_11 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_llama_model_discovery_unions_ollama_and_openrouter" }
|
||||
t3_12 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_llama_3_2_vision_vision_support" }
|
||||
t3_13 = { status = "pending", commit_sha = "", description = "Red: tests/test_llama_provider.py::test_llama_local_backend_cost_tracking_false" }
|
||||
t3_14 = { status = "pending", commit_sha = "", description = "Green: implement _send_llama, _ensure_llama_client, _list_llama_models in src/ai_client.py" }
|
||||
t3_15 = { status = "pending", commit_sha = "", description = "Add [llama] section to credentials_template.toml" }
|
||||
t3_16 = { status = "pending", commit_sha = "", description = "Add llama to PROVIDERS in src/gui_2.py and src/app_controller.py" }
|
||||
t3_17 = { status = "pending", commit_sha = "", description = "Add Llama models to capability registry" }
|
||||
t3_18 = { status = "pending", commit_sha = "", description = "Phase 3 checkpoint commit + git note" }
|
||||
# Phase 4: MiniMax refactor
|
||||
t4_1 = { status = "pending", commit_sha = "", description = "Baseline: run tests/test_minimax_provider.py; all pass (green)" }
|
||||
t4_2 = { status = "pending", commit_sha = "", description = "Refactor _send_minimax to use send_openai_compatible helper" }
|
||||
t4_3 = { status = "pending", commit_sha = "", description = "Verify tests/test_minimax_provider.py still pass (no regressions)" }
|
||||
t4_4 = { status = "pending", commit_sha = "", description = "Add MiniMax to capability registry (per-model: minimax-* entries with vision/tool/cost)" }
|
||||
t4_5 = { status = "pending", commit_sha = "", description = "Run full test suite; ensure no regressions" }
|
||||
t4_6 = { status = "pending", commit_sha = "", description = "Phase 4 checkpoint commit + git note" }
|
||||
# Phase 5: UX adaptation + integration
|
||||
t5_1 = { status = "pending", commit_sha = "", description = "Add _get_active_capabilities() helper to src/gui_2.py" }
|
||||
t5_2 = { status = "pending", commit_sha = "", description = "Apply 9 UX adaptations from spec.md §6 (vision, tools, cache, stream, fetch models, context window, cost)" }
|
||||
t5_3 = { status = "pending", commit_sha = "", description = "Update _predefined_callbacks / _gettable_fields to expose new provider selection" }
|
||||
t5_4 = { status = "pending", commit_sha = "", description = "Run full test suite; ensure no regressions in live_gui tests" }
|
||||
t5_5 = { status = "pending", commit_sha = "", description = "Manual smoke test: select Qwen, send message, tool executes; repeat for Llama, Grok" }
|
||||
t5_6 = { status = "pending", commit_sha = "", description = "Phase 5 checkpoint commit + git note" }
|
||||
# Phase 6: Docs + archive
|
||||
t6_1 = { status = "pending", commit_sha = "", description = "Update docs/guide_ai_client.md: new vendors section, capability matrix section, shared helper section" }
|
||||
t6_2 = { status = "pending", commit_sha = "", description = "Update docs/guide_models.md: new PROVIDERS entries for qwen/llama/grok" }
|
||||
t6_3 = { status = "pending", commit_sha = "", description = "git mv conductor/tracks/qwen_llama_grok_integration_20260606 to conductor/tracks/archive/" }
|
||||
t6_4 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md: move entry from Backlog to Recently Completed" }
|
||||
t6_5 = { status = "pending", commit_sha = "", description = "Final checkpoint commit + git note" }
|
||||
|
||||
[verification]
|
||||
# Filled as phases complete
|
||||
phase_1_capability_registry_complete = false
|
||||
phase_1_shared_helper_complete = false
|
||||
phase_2_qwen_dashscope_complete = false
|
||||
phase_3_grok_complete = false
|
||||
phase_3_llama_complete = false
|
||||
phase_4_minimax_refactor_preserves_tests = false
|
||||
phase_5_ux_adaptations_complete = false
|
||||
phase_5_smoke_test_passed = false
|
||||
phase_6_docs_updated = false
|
||||
phase_6_track_archived = false
|
||||
full_test_suite_passes = false
|
||||
no_new_threading_thread_calls = false
|
||||
|
||||
[openai_compatible_models]
|
||||
# Filled as models are added to capability registry
|
||||
qwen_turbo = false
|
||||
qwen_plus = false
|
||||
qwen_max = false
|
||||
qwen_long = false
|
||||
qwen_vl_plus = false
|
||||
qwen_vl_max = false
|
||||
qwen_audio = false
|
||||
llama_3_1_8b = false
|
||||
llama_3_1_70b = false
|
||||
llama_3_1_405b = false
|
||||
llama_3_2_1b = false
|
||||
llama_3_2_3b = false
|
||||
llama_3_2_11b_vision = false
|
||||
llama_3_2_90b_vision = false
|
||||
llama_3_3_70b = false
|
||||
grok_2 = false
|
||||
grok_2_vision = false
|
||||
grok_beta = false
|
||||
minimax_models_refactored = false
|
||||
|
||||
[minimax_refactor_stats]
|
||||
# Filled in Phase 4
|
||||
lines_before = 0
|
||||
lines_after = 0
|
||||
tests_passing = 0
|
||||
tests_failing = 0
|
||||
Reference in New Issue
Block a user