diff --git a/conductor/tracks/qwen_llama_grok_followup_20260611/TODO.md b/conductor/tracks/qwen_llama_grok_followup_20260611/TODO.md index f7077054..150ed684 100644 --- a/conductor/tracks/qwen_llama_grok_followup_20260611/TODO.md +++ b/conductor/tracks/qwen_llama_grok_followup_20260611/TODO.md @@ -23,7 +23,7 @@ This is a TODO list for setting up the follow-up track. The Tier 2 Tech Lead wil - [ ] Read current tool-loop patterns in `_send_minimax` (231 → 75 lines after refactor) and `_send_anthropic/_send_gemini/_send_gemini_cli/_send_deepseek` (inline loops) - [ ] Design `run_with_tool_loop(client, request, capabilities, *, pre_tool_callback, qa_callback, patch_callback, base_dir, vendor_name, history_lock, history, trim_func)` helper - [ ] Write 5 Red tests: no-tool-calls returns immediately, tool-calls dispatch, max-rounds limit, history appending, error-in-tool-call doesn't crash - - [ ] Implement helper in `src/tool_loop.py` + - [ ] Implement helper in `src/ai_client.py` - [ ] Apply to all 8 vendors - [ ] Audit script `scripts/audit_no_inline_tool_loops.py` to enforce the pattern - [ ] Verify all 38+ existing tests still pass diff --git a/conductor/tracks/qwen_llama_grok_followup_20260611/metadata.json b/conductor/tracks/qwen_llama_grok_followup_20260611/metadata.json index e68e57f8..6cd14ccc 100644 --- a/conductor/tracks/qwen_llama_grok_followup_20260611/metadata.json +++ b/conductor/tracks/qwen_llama_grok_followup_20260611/metadata.json @@ -8,10 +8,9 @@ "type": "refactor + feature", "scope": { "new_files": [ - "src/tool_loop.py", - "src/llama_ollama_native.py", - "src/llama_meta_api.py", - "tests/test_tool_loop.py", + "tests/test_ai_client_tool_loop.py", + "tests/test_ai_client_llama_ollama_native.py", + "tests/test_ai_client_llama_meta_api.py", "scripts/audit_no_inline_tool_loops.py", "scripts/audit_providers_source_of_truth.py" ], @@ -50,14 +49,14 @@ "2026-06-11: User wants ai_client.py further codepath consolidation; new files need review." ], "verification_criteria": [ - "src/tool_loop.py:run_with_tool_loop handles no-tool-calls, dispatches tool calls, respects max-rounds, appends to history, doesn't crash on tool error", + "src/ai_client.py:run_with_tool_loop handles no-tool-calls, dispatches tool calls, respects max-rounds, appends to history, doesn't crash on tool error", "All 8 vendors (_send_minimax, _send_qwen, _send_grok, _send_llama, _send_anthropic, _send_gemini, _send_gemini_cli, _send_deepseek) use run_with_tool_loop", "scripts/audit_no_inline_tool_loops.py passes (no inline tool loops in any _send_)", "PROVIDERS is no longer declared in src/models.py", "scripts/audit_providers_source_of_truth.py passes", "All 9 UX adaptations from parent spec §6 are applied to src/gui_2.py (1 from parent Phase 5 + 8 from this track's Phase 3)", - "src/llama_ollama_native.py: native Ollama adapter replaces OpenAI-compatible for Ollama backend (or used by default)", - "src/llama_meta_api.py: Meta Llama API adapter; new 4th backend", + "src/ai_client.py:ollama_chat is the native Ollama adapter; Ollama backend routes to it when base_url is localhost/127.0.0.1 (replaces OpenAI-compatible)", + "src/ai_client.py:meta_llama_chat is the Meta Llama API adapter; new 4th Llama backend (DEFER if https://llama.developer.meta.com/docs/overview still returns 400)", "src/vendor_capabilities.py: 12 new v2 fields added (local, reasoning, structured_output, code_execution, web_search, x_search, file_search, mcp_support, audio, video, grounding, computer_use)", "All vendor registry entries updated with the new fields", "Anthropic matrix entries populated (caching, extended_thinking, pdf, computer_use)", @@ -74,6 +73,6 @@ "parent_spec": "conductor/tracks/qwen_llama_grok_integration_20260606/spec.md", "ai_client_guide": "docs/guide_ai_client.md", "models_guide": "docs/guide_models.md", - "follow_up_audit_report": "docs/reports/qwen_llama_grok_followup_20260611.md (TBD; will be created in Phase 5)" + "follow_up_audit_report": "docs/reports/qwen_llama_grok_followup_audit_20260611.md (already exists; written 2026-06-11 at end of parent track Phase 6)", } } diff --git a/conductor/tracks/qwen_llama_grok_followup_20260611/plan.md b/conductor/tracks/qwen_llama_grok_followup_20260611/plan.md index 04318863..42ae5688 100644 --- a/conductor/tracks/qwen_llama_grok_followup_20260611/plan.md +++ b/conductor/tracks/qwen_llama_grok_followup_20260611/plan.md @@ -583,7 +583,7 @@ git commit --allow-empty -m "conductor(checkpoint): Phase 1 - run_with_tool_loop SHA=$(git log -1 --format="%H") git notes add -m "Phase 1 checkpoint: tool loop lift -run_with_tool_loop helper in src/tool_loop.py wraps send_openai_compatible +run_with_tool_loop helper in src/ai_client.py wraps send_openai_compatible with the tool-call loop. Applied to all 8 vendors: - _send_minimax (was inline, now uses helper) - _send_qwen (was single-shot, now has loop) @@ -1176,9 +1176,9 @@ git commit -m "test(llama_ollama_native): add red tests for native /api/chat ada --- -## Task 4.3: Implement `src/llama_ollama_native.py` + wire into `_send_llama` +## Task 4.3: Add `ollama_chat` + `_send_llama_native` to `src/ai_client.py`; route Ollama backend to native -**Files:** Create `src/llama_ollama_native.py`; modify `src/ai_client.py` (route Ollama to native) +**Files:** Modify `src/ai_client.py` (add `ollama_chat`, `_send_llama_native`; route Ollama to native) - [ ] **Step 1: Create the file** @@ -1266,7 +1266,7 @@ Expected: 3 passed. - [ ] **Step 5: Commit** ```bash -git add src/llama_ollama_native.py src/ai_client.py +git add src/ai_client.py git commit -m "feat(llama_ollama_native): add native /api/chat adapter; route Ollama backend to it" ``` @@ -1353,7 +1353,7 @@ SHA=$(git log -1 --format="%H") git notes add -m "Phase 4 checkpoint: local-first + matrix v2 - 12 v2 fields added to VendorCapabilities -- Native Ollama adapter (src/llama_ollama_native.py); Ollama backend +- Native Ollama adapter (in `src/ai_client.py`); Ollama backend now uses /api/chat (think, images) instead of /v1/chat/completions - 22 existing registry entries updated with the new v2 fields - GUI: 'Local Model' badge for local backends diff --git a/conductor/tracks/qwen_llama_grok_followup_20260611/spec.md b/conductor/tracks/qwen_llama_grok_followup_20260611/spec.md index 07f9b556..c092b400 100644 --- a/conductor/tracks/qwen_llama_grok_followup_20260611/spec.md +++ b/conductor/tracks/qwen_llama_grok_followup_20260611/spec.md @@ -29,10 +29,47 @@ The parent track `qwen_llama_grok_integration_20260606` (status: 50/79 tasks don ### Non-Goals (this track) -- Not changing the matrix schema (the 7 v1 fields are good; v2 is additive) -- Not changing the shared `send_openai_compatible` helper (it works; the tool loop is separate) -- Not changing the `vendor_capabilities.py` lookup pattern (it works; registry is the source of truth) -- Not adding new vendors (the parent track added Qwen/Grok/Llama; this track only consolidates what's there) +- **Not** changing the matrix schema beyond the 7 v1 + 12 v2 = 19 fields (no further fields in this track) +- **Not** changing the shared `send_openai_compatible` helper (it works; the tool loop is separate) +- **Not** changing the `vendor_capabilities.py` lookup pattern (it works; registry is the source of truth) +- **Not** adding new vendors (the parent track added Qwen/Grok/Llama; this track only consolidates what's there) +- **Not** cleaning up the existing sprawl (the 3 stray `src/` files `vendor_capabilities.py`, `openai_compatible.py`, `qwen_adapter.py` — see Deferred Work below) +- **Not** refactoring `src/ai_client.py` to a smaller line count (it's 2784 lines and the user said large files are fine) +- **Not** lifting history management into a `VendorHistory` class (out of scope; the existing per-vendor pattern works) +- **Not** lifting reasoning content extraction into a shared helper (out of scope; the per-vendor extraction is short) +- **Not** lifting error classification into a per-HTTP-code helper (out of scope; the per-vendor classifiers are short) + +### Deferred Work (separate tracks; out of scope for this one) + +The user explicitly stated (2026-06-11): "I know I have to setup audit tracks and refactor tracks down the line to prune and cleanup the codebase but I also know thats not feasible while just trying to get you todo the right thing for this new way of handling vendors or models." + +Three follow-up tracks are documented as DEFERRED (not in scope for this track): + +1. **`namespace_cleanup_20260611`** — Audit the codebase for file sprawl. Specifically: + - Move `src/vendor_capabilities.py` content into `src/ai_client.py` (the file is in scope to MODIFY for the v2 fields in this track, but moving it as a whole is the cleanup track's job) + - Move `src/openai_compatible.py` content into `src/ai_client.py` + - Move `src/qwen_adapter.py` content into `src/ai_client.py` + - Audit OTHER modules for similar sprawl: `src/imgui_scopes.py`, `src/markdown_helper.py`, `src/markdown_table.py`, `src/io_pool.py`, `src/external_editor.py`, `src/performance_monitor.py`, `src/session_logger.py`, etc. Some may legitimately be sub-systems that should be namespace-isolated; others may be helpers that should fold into a parent. + +2. **`ai_client_codepath_consolidation_20260611`** — Reduce `src/ai_client.py` line count from 2784 by: + - Lifting history management into a `VendorHistory` class (each vendor has its own lock + history list; the per-vendor boilerplate is ~30 lines × 8 vendors = 240 lines of duplication) + - Lifting reasoning content extraction into a shared helper + - Lifting error classification into a per-HTTP-code helper + - Lifting the per-vendor client init into a uniform pattern + - The line count reduction is estimated at 30-40% (~1000 lines saved) + - **Note:** the user explicitly said large files are FINE, so this codepath consolidation is about REDUCING DUPLICATION, not about reducing file size. The file can stay large; we just want less repetition. + +3. **`mcp_architecture_refactor_20260606`** (already specced) — Splits `src/mcp_client.py` (2,205 lines) into 6 sub-MCPs (`mcp_file_io.py`, `mcp_python.py`, `mcp_c.py`, `mcp_cpp.py`, `mcp_web.py`, `mcp_analysis.py`). This is the OPPOSITE direction of the user's preference (the user wants things in one file, not split). **Note:** this track is already specced in the parent tracks.md; whether to actually execute it (vs. abort it) is a separate decision. The user may want to abort this track. + +### Naming Convention Reference (HARD RULE, per `AGENTS.md`) + +New `src/.py` files may only be created on the user's explicit request. If you find yourself about to create one, **ASK FIRST** — don't just create it. Defaults: +- Helpers and sub-systems go in the parent module +- E.g., AI-client-specific code goes in `src/ai_client.py`; MCP-client code goes in `src/mcp_client.py` +- Even if the parent file is already 3K+ lines, the helper still goes there +- The only new files this project ever creates (per typical track) are: `scripts/audit_*.py`, `tests/test_*.py`, and `docs/*.md` + +See `AGENTS.md` "File Size and Naming Convention" for the full rule. This rule was added 2026-06-11 after the user called out the LLM training data bias against large files. --- @@ -40,6 +77,8 @@ The parent track `qwen_llama_grok_integration_20260606` (status: 50/79 tasks don ### A.1 Tool Loop Lift +**Naming convention (HARD RULE, per `AGENTS.md`):** `run_with_tool_loop` lives IN `src/ai_client.py`, not in a new `src/tool_loop.py`. New `src/.py` files may only be created on the user's explicit request. The only new files in this track are: `scripts/audit_*.py`, `tests/test_*.py`, and `docs/*.md`. See `AGENTS.md` "File Size and Naming Convention" for the full rule. + Today: ```python # in _send_minimax (only): @@ -54,9 +93,9 @@ for _round in range(MAX_TOOL_ROUNDS + 2): # in _send_anthropic, _send_gemini, _send_gemini_cli, _send_deepseek: inline loop (4-way duplication) ``` -After: +After (all in `src/ai_client.py`): ```python -# new src/tool_loop.py: +# added near _execute_tool_calls_concurrently at src/ai_client.py:754 def run_with_tool_loop( client, request, capabilities, *, pre_tool_callback, qa_callback, patch_callback, @@ -76,12 +115,14 @@ response = run_with_tool_loop( base_dir=base_dir, vendor_name="", history_lock=__history_lock, history=__history, - trim_func=_(vendor)_trim_history, + trim_func=__trim_history, ) ``` The helper takes history management as injected parameters (each vendor has its own lock and history list). The tool dispatch (`_execute_tool_calls_concurrently`) takes a `vendor_name` string. +**Audit enforcement:** the new `scripts/audit_no_inline_tool_loops.py` fails if any `_send_()` has an inline `for _round_idx in range(MAX_TOOL_ROUNDS` pattern. + ### A.2 PROVIDERS Move Today: @@ -115,16 +156,20 @@ if not caps.: ### B.1 Local-First Architecture -- Add `local_backend: bool` to `VendorCapabilities` (default False) +**Per user feedback (2026-06-11):** "I want to put more emphasis and supporting local models and separating local model vending vis online/cloud vendors of models." Local models must be first-class, not "one of 3 backends." + +- Add `local: bool` to `VendorCapabilities` (default False) - Set True for Llama (when base_url is localhost/127.0.0.1) -- Native Ollama adapter: `src/llama_ollama_native.py` (separate from openai_compatible) -- Meta Llama API adapter: `src/llama_meta_api.py` (verify docs first) -- GUI: new "Local Model" badge in the AI Settings panel -- Cost panel: 4th state "Local (no cost)" distinct from "Free (local)" and "—" +- **Native Ollama adapter (in `src/ai_client.py`, NOT a new file):** `ollama_chat()` function lives alongside the existing `_send_llama`. The Ollama backend routes to native `/api/chat` (with `think`, `images` array) instead of OpenAI-compatible `/v1/chat/completions`. Native is the DEFAULT for localhost. +- **Meta Llama API as 4th backend (in `src/ai_client.py`):** `meta_llama_chat()` function. **Prerequisite:** verify the URL `https://llama.developer.meta.com/docs/overview` is reachable; it returned 400 in the parent's session. If unreachable on track start, DEFER the Meta backend to a separate follow-up; the native Ollama + 3 existing backends still ship. +- **GUI: "Local Model" badge** in the AI Settings panel when `caps.local` is True +- **Cost panel: 4th state "Local (no cost)"** distinct from "Free (local)" and "—" (replaces adaption 8's "Free (local)" wording per the v2 matrix; the original parent Phase 5 wording was "Free (local)" which was OK but the follow-up's v2 matrix adds an explicit `local` field that lets the UI be cleaner) + +**Naming convention (HARD RULE):** `ollama_chat()` and `meta_llama_chat()` live in `src/ai_client.py` (NOT new `src/llama_ollama_native.py` and `src/llama_meta_api.py`). Per `AGENTS.md` "File Size and Naming Convention" — new top-level `src/.py` files require explicit user request. ### B.2 Matrix Expansion (v2) -Add to `VendorCapabilities`: +Add to `VendorCapabilities` (the 12 v2 fields): - `local: bool` (B.1) - `reasoning: bool` (xAI `reasoning_effort`, Anthropic extended thinking, Ollama `think`) - `structured_output: bool` (response_format / format) @@ -140,6 +185,20 @@ Add to `VendorCapabilities`: Each new field is a registry update + a UI adaptation. The matrix schema grows; the GUI filters based on the matrix. +**UI adaptations for v2 fields** (one per field, in `src/gui_2.py`): +- `reasoning` → "Reasoning" toggle (controls `reasoning_effort` for xAI, etc.) +- `structured_output` → "JSON output" toggle +- `code_execution` → "Code execution" panel (when True) +- `web_search`, `x_search` → Search tool UI +- `file_search` → File search panel +- `mcp_support` → MCP integration toggle +- `audio` → Audio attachment button (replaces the absent-but-deferred audio_input) +- `video` → Video attachment button +- `grounding` → "Grounding" toggle +- `computer_use` → "Computer Use" toggle + +Most of these UI adaptations are small (5-10 line additions per field). They can ship in a batch commit per field, or one big commit at the end of Phase 4. + ### C.1 Anthropic / Gemini / DeepSeek Migration Per the deferred follow-up track `anthropic_gemini_deepseek_capability_matrix_20260606` (parent spec §13.1.A). The capability matrix entries for these vendors can be populated: @@ -155,7 +214,7 @@ The implementations (`_send_anthropic`, `_send_gemini`, `_send_deepseek`) keep t ### Phase 1: Tool Loop Lift (1-2 weeks) - T1.1: Write red tests for `run_with_tool_loop` (5 tests covering: no tool calls returns immediately, tool calls dispatch, max rounds limit, history appending, error in tool call doesn't crash) -- T1.2: Implement `src/tool_loop.py` with `run_with_tool_loop` +- T1.2: Implement `run_with_tool_loop` in `src/ai_client.py` (NOT a new file; per the naming convention HARD RULE) - T1.3: Apply to `_send_minimax` (replace inline loop) - T1.4: Apply to `_send_qwen`, `_send_grok`, `_send_llama` (add the missing loop) - T1.5: Apply to `_send_anthropic`, `_send_gemini`, `_send_gemini_cli`, `_send_deepseek` (consolidate) @@ -181,8 +240,8 @@ The implementations (`_send_anthropic`, `_send_gemini`, `_send_deepseek`) keep t ### Phase 4: Local-First + Matrix Expansion (1-2 weeks) - T4.1: Add `local: bool` to VendorCapabilities; update registry for Llama -- T4.2: Native Ollama adapter (`src/llama_ollama_native.py`); replace OpenAI-compatible for Ollama backend -- T4.3: Meta Llama API adapter (`src/llama_meta_api.py`); add as 4th Llama backend +- T4.2: Native Ollama adapter (in `src/ai_client.py` as `ollama_chat` + `_send_llama_native`); replace OpenAI-compatible for Ollama backend +- T4.3: Meta Llama API adapter (in `src/ai_client.py` as `meta_llama_chat`); add as 4th Llama backend (DEFER if URL still 400) - T4.4: GUI: "Local Model" badge - T4.5: Add v2 fields (local, reasoning, structured_output, code_execution, web_search, x_search, file_search, mcp_support, audio, video, grounding, computer_use) - T4.6: Update all vendor registry entries with the new fields @@ -216,9 +275,9 @@ The implementations (`_send_anthropic`, `_send_gemini`, `_send_deepseek`) keep t ## Open Questions -1. **`src/ai_client_providers.py` vs `src/ai_client.py`?** Should PROVIDERS go in a new file (clearer separation) or stay in the main ai_client module (less file proliferation)? -2. **Meta Llama API spec verification:** The 400 error on the docs URL last session — is it back up? If not, defer the Meta backend. -3. **Local model as separate UI mode?** Should the GUI have a "Local / Cloud / All" filter on the provider dropdown, or just show the local badge per-vendor? +1. **Meta Llama API spec verification:** The 400 error on `https://llama.developer.meta.com/docs/overview` last session. Re-verify on Phase 4 start. If still 400, **defer the Meta backend** to a separate follow-up; the native Ollama + 3 existing backends still ship. +2. **Local model as separate UI mode?** Should the GUI have a "Local / Cloud / All" filter on the provider dropdown, or just show the local badge per-vendor? Default: per-vendor badge (Phase 4 minimum). The filter is a future-track enhancement. +3. **PROVIDERS location:** **RESOLVED (2026-06-11):** `src/ai_client.py` (NOT a new `src/ai_client_providers.py`). The PROVIDERS list is small (8 entries); creating a new file for a single constant is over-engineering. The vendor list is logically part of the AI client. --- diff --git a/conductor/tracks/qwen_llama_grok_followup_20260611/state.toml b/conductor/tracks/qwen_llama_grok_followup_20260611/state.toml index cf1473e4..d9a5996c 100644 --- a/conductor/tracks/qwen_llama_grok_followup_20260611/state.toml +++ b/conductor/tracks/qwen_llama_grok_followup_20260611/state.toml @@ -24,7 +24,7 @@ phase_5 = { status = "pending", checkpoint_sha = "", name = "Anthropic/Gemini/De t1_1 = { status = "pending", commit_sha = "", description = "Read tool-loop patterns in _send_minimax + the 4 inline-loop vendors" } t1_2 = { status = "pending", commit_sha = "", description = "Design run_with_tool_loop helper signature" } t1_3 = { status = "pending", commit_sha = "", description = "Red: 5 tests for run_with_tool_loop in tests/test_tool_loop.py" } -t1_4 = { status = "pending", commit_sha = "", description = "Green: implement run_with_tool_loop in src/tool_loop.py" } +t1_4 = { status = "pending", commit_sha = "", description = "Green: implement run_with_tool_loop in src/ai_client.py" } t1_5 = { status = "pending", commit_sha = "", description = "Apply to _send_minimax (replace inline loop)" } t1_6 = { status = "pending", commit_sha = "", description = "Apply to _send_qwen + _send_grok + _send_llama (add missing loop)" } t1_7 = { status = "pending", commit_sha = "", description = "Apply to _send_anthropic + _send_gemini + _send_gemini_cli + _send_deepseek (consolidate inline)" } @@ -48,8 +48,8 @@ t3_8 = { status = "pending", commit_sha = "", description = "Adaptation 9: cost t3_9 = { status = "pending", commit_sha = "", description = "Phase 3 checkpoint + git note" } # Phase 4: Local-first + matrix v2 t4_1 = { status = "pending", commit_sha = "", description = "Add local: bool to VendorCapabilities" } -t4_2 = { status = "pending", commit_sha = "", description = "Native Ollama adapter src/llama_ollama_native.py" } -t4_3 = { status = "pending", commit_sha = "", description = "Meta Llama API adapter src/llama_meta_api.py" } +t4_2 = { status = "pending", commit_sha = "", description = "Native Ollama adapter (in src/ai_client.py as ollama_chat + _send_llama_native; route Ollama backend to it)" } +t4_3 = { status = "pending", commit_sha = "", description = "Meta Llama API adapter (in src/ai_client.py as meta_llama_chat; new 4th Llama backend; DEFER if URL still 400)" } t4_4 = { status = "pending", commit_sha = "", description = "GUI: 'Local Model' badge" } t4_5 = { status = "pending", commit_sha = "", description = "Add 12 v2 fields to VendorCapabilities" } t4_6 = { status = "pending", commit_sha = "", description = "Update all vendor registry entries" }