diff --git a/conductor/tracks/qwen_llama_grok_followup_20260611/TODO.md b/conductor/tracks/qwen_llama_grok_followup_20260611/TODO.md
index f7077054..150ed684 100644
--- a/conductor/tracks/qwen_llama_grok_followup_20260611/TODO.md
+++ b/conductor/tracks/qwen_llama_grok_followup_20260611/TODO.md
@@ -23,7 +23,7 @@ This is a TODO list for setting up the follow-up track. The Tier 2 Tech Lead wil
    - [ ] Read current tool-loop patterns in `_send_minimax` (231 → 75 lines after refactor) and `_send_anthropic/_send_gemini/_send_gemini_cli/_send_deepseek` (inline loops)
    - [ ] Design `run_with_tool_loop(client, request, capabilities, *, pre_tool_callback, qa_callback, patch_callback, base_dir, vendor_name, history_lock, history, trim_func)` helper
    - [ ] Write 5 Red tests: no-tool-calls returns immediately, tool-calls dispatch, max-rounds limit, history appending, error-in-tool-call doesn't crash
-   - [ ] Implement helper in `src/tool_loop.py`
+   - [ ] Implement helper in `src/ai_client.py`
    - [ ] Apply to all 8 vendors
    - [ ] Audit script `scripts/audit_no_inline_tool_loops.py` to enforce the pattern
    - [ ] Verify all 38+ existing tests still pass
diff --git a/conductor/tracks/qwen_llama_grok_followup_20260611/metadata.json b/conductor/tracks/qwen_llama_grok_followup_20260611/metadata.json
index e68e57f8..6cd14ccc 100644
--- a/conductor/tracks/qwen_llama_grok_followup_20260611/metadata.json
+++ b/conductor/tracks/qwen_llama_grok_followup_20260611/metadata.json
@@ -8,10 +8,9 @@
   "type": "refactor + feature",
   "scope": {
     "new_files": [
-      "src/tool_loop.py",
-      "src/llama_ollama_native.py",
-      "src/llama_meta_api.py",
-      "tests/test_tool_loop.py",
+      "tests/test_ai_client_tool_loop.py",
+      "tests/test_ai_client_llama_ollama_native.py",
+      "tests/test_ai_client_llama_meta_api.py",
       "scripts/audit_no_inline_tool_loops.py",
       "scripts/audit_providers_source_of_truth.py"
     ],
@@ -50,14 +49,14 @@
     "2026-06-11: User wants ai_client.py further codepath consolidation; new files need review."
   ],
   "verification_criteria": [
-    "src/tool_loop.py:run_with_tool_loop handles no-tool-calls, dispatches tool calls, respects max-rounds, appends to history, doesn't crash on tool error",
+    "src/ai_client.py:run_with_tool_loop handles no-tool-calls, dispatches tool calls, respects max-rounds, appends to history, doesn't crash on tool error",
     "All 8 vendors (_send_minimax, _send_qwen, _send_grok, _send_llama, _send_anthropic, _send_gemini, _send_gemini_cli, _send_deepseek) use run_with_tool_loop",
     "scripts/audit_no_inline_tool_loops.py passes (no inline tool loops in any _send_<vendor>)",
     "PROVIDERS is no longer declared in src/models.py",
     "scripts/audit_providers_source_of_truth.py passes",
     "All 9 UX adaptations from parent spec §6 are applied to src/gui_2.py (1 from parent Phase 5 + 8 from this track's Phase 3)",
-    "src/llama_ollama_native.py: native Ollama adapter replaces OpenAI-compatible for Ollama backend (or used by default)",
-    "src/llama_meta_api.py: Meta Llama API adapter; new 4th backend",
+    "src/ai_client.py:ollama_chat is the native Ollama adapter; Ollama backend routes to it when base_url is localhost/127.0.0.1 (replaces OpenAI-compatible)",
+    "src/ai_client.py:meta_llama_chat is the Meta Llama API adapter; new 4th Llama backend (DEFER if https://llama.developer.meta.com/docs/overview still returns 400)",
     "src/vendor_capabilities.py: 12 new v2 fields added (local, reasoning, structured_output, code_execution, web_search, x_search, file_search, mcp_support, audio, video, grounding, computer_use)",
     "All vendor registry entries updated with the new fields",
     "Anthropic matrix entries populated (caching, extended_thinking, pdf, computer_use)",
@@ -74,6 +73,6 @@
     "parent_spec": "conductor/tracks/qwen_llama_grok_integration_20260606/spec.md",
     "ai_client_guide": "docs/guide_ai_client.md",
     "models_guide": "docs/guide_models.md",
-    "follow_up_audit_report": "docs/reports/qwen_llama_grok_followup_20260611.md (TBD; will be created in Phase 5)"
+    "follow_up_audit_report": "docs/reports/qwen_llama_grok_followup_audit_20260611.md (already exists; written 2026-06-11 at end of parent track Phase 6)",
   }
 }
diff --git a/conductor/tracks/qwen_llama_grok_followup_20260611/plan.md b/conductor/tracks/qwen_llama_grok_followup_20260611/plan.md
index 04318863..42ae5688 100644
--- a/conductor/tracks/qwen_llama_grok_followup_20260611/plan.md
+++ b/conductor/tracks/qwen_llama_grok_followup_20260611/plan.md
@@ -583,7 +583,7 @@ git commit --allow-empty -m "conductor(checkpoint): Phase 1 - run_with_tool_loop
 SHA=$(git log -1 --format="%H")
 git notes add -m "Phase 1 checkpoint: tool loop lift
 
-run_with_tool_loop helper in src/tool_loop.py wraps send_openai_compatible
+run_with_tool_loop helper in src/ai_client.py wraps send_openai_compatible
 with the tool-call loop. Applied to all 8 vendors:
 - _send_minimax (was inline, now uses helper)
 - _send_qwen (was single-shot, now has loop)
@@ -1176,9 +1176,9 @@ git commit -m "test(llama_ollama_native): add red tests for native /api/chat ada
 
 ---
 
-## Task 4.3: Implement `src/llama_ollama_native.py` + wire into `_send_llama`
+## Task 4.3: Add `ollama_chat` + `_send_llama_native` to `src/ai_client.py`; route Ollama backend to native
 
-**Files:** Create `src/llama_ollama_native.py`; modify `src/ai_client.py` (route Ollama to native)
+**Files:** Modify `src/ai_client.py` (add `ollama_chat`, `_send_llama_native`; route Ollama to native)
 
 - [ ] **Step 1: Create the file**
 
@@ -1266,7 +1266,7 @@ Expected: 3 passed.
 - [ ] **Step 5: Commit**
 
 ```bash
-git add src/llama_ollama_native.py src/ai_client.py
+git add src/ai_client.py
 git commit -m "feat(llama_ollama_native): add native /api/chat adapter; route Ollama backend to it"
 ```
 
@@ -1353,7 +1353,7 @@ SHA=$(git log -1 --format="%H")
 git notes add -m "Phase 4 checkpoint: local-first + matrix v2
 
 - 12 v2 fields added to VendorCapabilities
-- Native Ollama adapter (src/llama_ollama_native.py); Ollama backend
+- Native Ollama adapter (in `src/ai_client.py`); Ollama backend
   now uses /api/chat (think, images) instead of /v1/chat/completions
 - 22 existing registry entries updated with the new v2 fields
 - GUI: 'Local Model' badge for local backends
diff --git a/conductor/tracks/qwen_llama_grok_followup_20260611/spec.md b/conductor/tracks/qwen_llama_grok_followup_20260611/spec.md
index 07f9b556..c092b400 100644
--- a/conductor/tracks/qwen_llama_grok_followup_20260611/spec.md
+++ b/conductor/tracks/qwen_llama_grok_followup_20260611/spec.md
@@ -29,10 +29,47 @@ The parent track `qwen_llama_grok_integration_20260606` (status: 50/79 tasks don
 
 ### Non-Goals (this track)
 
-- Not changing the matrix schema (the 7 v1 fields are good; v2 is additive)
-- Not changing the shared `send_openai_compatible` helper (it works; the tool loop is separate)
-- Not changing the `vendor_capabilities.py` lookup pattern (it works; registry is the source of truth)
-- Not adding new vendors (the parent track added Qwen/Grok/Llama; this track only consolidates what's there)
+- **Not** changing the matrix schema beyond the 7 v1 + 12 v2 = 19 fields (no further fields in this track)
+- **Not** changing the shared `send_openai_compatible` helper (it works; the tool loop is separate)
+- **Not** changing the `vendor_capabilities.py` lookup pattern (it works; registry is the source of truth)
+- **Not** adding new vendors (the parent track added Qwen/Grok/Llama; this track only consolidates what's there)
+- **Not** cleaning up the existing sprawl (the 3 stray `src/` files `vendor_capabilities.py`, `openai_compatible.py`, `qwen_adapter.py` — see Deferred Work below)
+- **Not** refactoring `src/ai_client.py` to a smaller line count (it's 2784 lines and the user said large files are fine)
+- **Not** lifting history management into a `VendorHistory` class (out of scope; the existing per-vendor pattern works)
+- **Not** lifting reasoning content extraction into a shared helper (out of scope; the per-vendor extraction is short)
+- **Not** lifting error classification into a per-HTTP-code helper (out of scope; the per-vendor classifiers are short)
+
+### Deferred Work (separate tracks; out of scope for this one)
+
+The user explicitly stated (2026-06-11): "I know I have to setup audit tracks and refactor tracks down the line to prune and cleanup the codebase but I also know thats not feasible while just trying to get you todo the right thing for this new way of handling vendors or models."
+
+Three follow-up tracks are documented as DEFERRED (not in scope for this track):
+
+1. **`namespace_cleanup_20260611`** — Audit the codebase for file sprawl. Specifically:
+   - Move `src/vendor_capabilities.py` content into `src/ai_client.py` (the file is in scope to MODIFY for the v2 fields in this track, but moving it as a whole is the cleanup track's job)
+   - Move `src/openai_compatible.py` content into `src/ai_client.py`
+   - Move `src/qwen_adapter.py` content into `src/ai_client.py`
+   - Audit OTHER modules for similar sprawl: `src/imgui_scopes.py`, `src/markdown_helper.py`, `src/markdown_table.py`, `src/io_pool.py`, `src/external_editor.py`, `src/performance_monitor.py`, `src/session_logger.py`, etc. Some may legitimately be sub-systems that should be namespace-isolated; others may be helpers that should fold into a parent.
+
+2. **`ai_client_codepath_consolidation_20260611`** — Reduce `src/ai_client.py` line count from 2784 by:
+   - Lifting history management into a `VendorHistory` class (each vendor has its own lock + history list; the per-vendor boilerplate is ~30 lines × 8 vendors = 240 lines of duplication)
+   - Lifting reasoning content extraction into a shared helper
+   - Lifting error classification into a per-HTTP-code helper
+   - Lifting the per-vendor client init into a uniform pattern
+   - The line count reduction is estimated at 30-40% (~1000 lines saved)
+   - **Note:** the user explicitly said large files are FINE, so this codepath consolidation is about REDUCING DUPLICATION, not about reducing file size. The file can stay large; we just want less repetition.
+
+3. **`mcp_architecture_refactor_20260606`** (already specced) — Splits `src/mcp_client.py` (2,205 lines) into 6 sub-MCPs (`mcp_file_io.py`, `mcp_python.py`, `mcp_c.py`, `mcp_cpp.py`, `mcp_web.py`, `mcp_analysis.py`). This is the OPPOSITE direction of the user's preference (the user wants things in one file, not split). **Note:** this track is already specced in the parent tracks.md; whether to actually execute it (vs. abort it) is a separate decision. The user may want to abort this track.
+
+### Naming Convention Reference (HARD RULE, per `AGENTS.md`)
+
+New `src/<thing>.py` files may only be created on the user's explicit request. If you find yourself about to create one, **ASK FIRST** — don't just create it. Defaults:
+- Helpers and sub-systems go in the parent module
+- E.g., AI-client-specific code goes in `src/ai_client.py`; MCP-client code goes in `src/mcp_client.py`
+- Even if the parent file is already 3K+ lines, the helper still goes there
+- The only new files this project ever creates (per typical track) are: `scripts/audit_*.py`, `tests/test_*.py`, and `docs/*.md`
+
+See `AGENTS.md` "File Size and Naming Convention" for the full rule. This rule was added 2026-06-11 after the user called out the LLM training data bias against large files.
 
 ---
 
@@ -40,6 +77,8 @@ The parent track `qwen_llama_grok_integration_20260606` (status: 50/79 tasks don
 
 ### A.1 Tool Loop Lift
 
+**Naming convention (HARD RULE, per `AGENTS.md`):** `run_with_tool_loop` lives IN `src/ai_client.py`, not in a new `src/tool_loop.py`. New `src/<thing>.py` files may only be created on the user's explicit request. The only new files in this track are: `scripts/audit_*.py`, `tests/test_*.py`, and `docs/*.md`. See `AGENTS.md` "File Size and Naming Convention" for the full rule.
+
 Today:
 ```python
 # in _send_minimax (only):
@@ -54,9 +93,9 @@ for _round in range(MAX_TOOL_ROUNDS + 2):
 # in _send_anthropic, _send_gemini, _send_gemini_cli, _send_deepseek: inline loop (4-way duplication)
 ```
 
-After:
+After (all in `src/ai_client.py`):
 ```python
-# new src/tool_loop.py:
+# added near _execute_tool_calls_concurrently at src/ai_client.py:754
 def run_with_tool_loop(
     client, request, capabilities, *,
     pre_tool_callback, qa_callback, patch_callback,
@@ -76,12 +115,14 @@ response = run_with_tool_loop(
     base_dir=base_dir, vendor_name="<vendor>",
     history_lock=_<vendor>_history_lock,
     history=_<vendor>_history,
-    trim_func=_(vendor)_trim_history,
+    trim_func=_<vendor>_trim_history,
 )
 ```
 
 The helper takes history management as injected parameters (each vendor has its own lock and history list). The tool dispatch (`_execute_tool_calls_concurrently`) takes a `vendor_name` string.
 
+**Audit enforcement:** the new `scripts/audit_no_inline_tool_loops.py` fails if any `_send_<vendor>()` has an inline `for _round_idx in range(MAX_TOOL_ROUNDS` pattern.
+
 ### A.2 PROVIDERS Move
 
 Today:
@@ -115,16 +156,20 @@ if not caps.<field>:
 
 ### B.1 Local-First Architecture
 
-- Add `local_backend: bool` to `VendorCapabilities` (default False)
+**Per user feedback (2026-06-11):** "I want to put more emphasis and supporting local models and separating local model vending vis online/cloud vendors of models." Local models must be first-class, not "one of 3 backends."
+
+- Add `local: bool` to `VendorCapabilities` (default False)
 - Set True for Llama (when base_url is localhost/127.0.0.1)
-- Native Ollama adapter: `src/llama_ollama_native.py` (separate from openai_compatible)
-- Meta Llama API adapter: `src/llama_meta_api.py` (verify docs first)
-- GUI: new "Local Model" badge in the AI Settings panel
-- Cost panel: 4th state "Local (no cost)" distinct from "Free (local)" and "—"
+- **Native Ollama adapter (in `src/ai_client.py`, NOT a new file):** `ollama_chat()` function lives alongside the existing `_send_llama`. The Ollama backend routes to native `/api/chat` (with `think`, `images` array) instead of OpenAI-compatible `/v1/chat/completions`. Native is the DEFAULT for localhost.
+- **Meta Llama API as 4th backend (in `src/ai_client.py`):** `meta_llama_chat()` function. **Prerequisite:** verify the URL `https://llama.developer.meta.com/docs/overview` is reachable; it returned 400 in the parent's session. If unreachable on track start, DEFER the Meta backend to a separate follow-up; the native Ollama + 3 existing backends still ship.
+- **GUI: "Local Model" badge** in the AI Settings panel when `caps.local` is True
+- **Cost panel: 4th state "Local (no cost)"** distinct from "Free (local)" and "—" (replaces adaption 8's "Free (local)" wording per the v2 matrix; the original parent Phase 5 wording was "Free (local)" which was OK but the follow-up's v2 matrix adds an explicit `local` field that lets the UI be cleaner)
+
+**Naming convention (HARD RULE):** `ollama_chat()` and `meta_llama_chat()` live in `src/ai_client.py` (NOT new `src/llama_ollama_native.py` and `src/llama_meta_api.py`). Per `AGENTS.md` "File Size and Naming Convention" — new top-level `src/<thing>.py` files require explicit user request.
 
 ### B.2 Matrix Expansion (v2)
 
-Add to `VendorCapabilities`:
+Add to `VendorCapabilities` (the 12 v2 fields):
 - `local: bool` (B.1)
 - `reasoning: bool` (xAI `reasoning_effort`, Anthropic extended thinking, Ollama `think`)
 - `structured_output: bool` (response_format / format)
@@ -140,6 +185,20 @@ Add to `VendorCapabilities`:
 
 Each new field is a registry update + a UI adaptation. The matrix schema grows; the GUI filters based on the matrix.
 
+**UI adaptations for v2 fields** (one per field, in `src/gui_2.py`):
+- `reasoning` → "Reasoning" toggle (controls `reasoning_effort` for xAI, etc.)
+- `structured_output` → "JSON output" toggle
+- `code_execution` → "Code execution" panel (when True)
+- `web_search`, `x_search` → Search tool UI
+- `file_search` → File search panel
+- `mcp_support` → MCP integration toggle
+- `audio` → Audio attachment button (replaces the absent-but-deferred audio_input)
+- `video` → Video attachment button
+- `grounding` → "Grounding" toggle
+- `computer_use` → "Computer Use" toggle
+
+Most of these UI adaptations are small (5-10 line additions per field). They can ship in a batch commit per field, or one big commit at the end of Phase 4.
+
 ### C.1 Anthropic / Gemini / DeepSeek Migration
 
 Per the deferred follow-up track `anthropic_gemini_deepseek_capability_matrix_20260606` (parent spec §13.1.A). The capability matrix entries for these vendors can be populated:
@@ -155,7 +214,7 @@ The implementations (`_send_anthropic`, `_send_gemini`, `_send_deepseek`) keep t
 
 ### Phase 1: Tool Loop Lift (1-2 weeks)
 - T1.1: Write red tests for `run_with_tool_loop` (5 tests covering: no tool calls returns immediately, tool calls dispatch, max rounds limit, history appending, error in tool call doesn't crash)
-- T1.2: Implement `src/tool_loop.py` with `run_with_tool_loop`
+- T1.2: Implement `run_with_tool_loop` in `src/ai_client.py` (NOT a new file; per the naming convention HARD RULE)
 - T1.3: Apply to `_send_minimax` (replace inline loop)
 - T1.4: Apply to `_send_qwen`, `_send_grok`, `_send_llama` (add the missing loop)
 - T1.5: Apply to `_send_anthropic`, `_send_gemini`, `_send_gemini_cli`, `_send_deepseek` (consolidate)
@@ -181,8 +240,8 @@ The implementations (`_send_anthropic`, `_send_gemini`, `_send_deepseek`) keep t
 
 ### Phase 4: Local-First + Matrix Expansion (1-2 weeks)
 - T4.1: Add `local: bool` to VendorCapabilities; update registry for Llama
-- T4.2: Native Ollama adapter (`src/llama_ollama_native.py`); replace OpenAI-compatible for Ollama backend
-- T4.3: Meta Llama API adapter (`src/llama_meta_api.py`); add as 4th Llama backend
+- T4.2: Native Ollama adapter (in `src/ai_client.py` as `ollama_chat` + `_send_llama_native`); replace OpenAI-compatible for Ollama backend
+- T4.3: Meta Llama API adapter (in `src/ai_client.py` as `meta_llama_chat`); add as 4th Llama backend (DEFER if URL still 400)
 - T4.4: GUI: "Local Model" badge
 - T4.5: Add v2 fields (local, reasoning, structured_output, code_execution, web_search, x_search, file_search, mcp_support, audio, video, grounding, computer_use)
 - T4.6: Update all vendor registry entries with the new fields
@@ -216,9 +275,9 @@ The implementations (`_send_anthropic`, `_send_gemini`, `_send_deepseek`) keep t
 
 ## Open Questions
 
-1. **`src/ai_client_providers.py` vs `src/ai_client.py`?** Should PROVIDERS go in a new file (clearer separation) or stay in the main ai_client module (less file proliferation)?
-2. **Meta Llama API spec verification:** The 400 error on the docs URL last session — is it back up? If not, defer the Meta backend.
-3. **Local model as separate UI mode?** Should the GUI have a "Local / Cloud / All" filter on the provider dropdown, or just show the local badge per-vendor?
+1. **Meta Llama API spec verification:** The 400 error on `https://llama.developer.meta.com/docs/overview` last session. Re-verify on Phase 4 start. If still 400, **defer the Meta backend** to a separate follow-up; the native Ollama + 3 existing backends still ship.
+2. **Local model as separate UI mode?** Should the GUI have a "Local / Cloud / All" filter on the provider dropdown, or just show the local badge per-vendor? Default: per-vendor badge (Phase 4 minimum). The filter is a future-track enhancement.
+3. **PROVIDERS location:** **RESOLVED (2026-06-11):** `src/ai_client.py` (NOT a new `src/ai_client_providers.py`). The PROVIDERS list is small (8 entries); creating a new file for a single constant is over-engineering. The vendor list is logically part of the AI client.
 
 ---
 
diff --git a/conductor/tracks/qwen_llama_grok_followup_20260611/state.toml b/conductor/tracks/qwen_llama_grok_followup_20260611/state.toml
index cf1473e4..d9a5996c 100644
--- a/conductor/tracks/qwen_llama_grok_followup_20260611/state.toml
+++ b/conductor/tracks/qwen_llama_grok_followup_20260611/state.toml
@@ -24,7 +24,7 @@ phase_5 = { status = "pending", checkpoint_sha = "", name = "Anthropic/Gemini/De
 t1_1 = { status = "pending", commit_sha = "", description = "Read tool-loop patterns in _send_minimax + the 4 inline-loop vendors" }
 t1_2 = { status = "pending", commit_sha = "", description = "Design run_with_tool_loop helper signature" }
 t1_3 = { status = "pending", commit_sha = "", description = "Red: 5 tests for run_with_tool_loop in tests/test_tool_loop.py" }
-t1_4 = { status = "pending", commit_sha = "", description = "Green: implement run_with_tool_loop in src/tool_loop.py" }
+t1_4 = { status = "pending", commit_sha = "", description = "Green: implement run_with_tool_loop in src/ai_client.py" }
 t1_5 = { status = "pending", commit_sha = "", description = "Apply to _send_minimax (replace inline loop)" }
 t1_6 = { status = "pending", commit_sha = "", description = "Apply to _send_qwen + _send_grok + _send_llama (add missing loop)" }
 t1_7 = { status = "pending", commit_sha = "", description = "Apply to _send_anthropic + _send_gemini + _send_gemini_cli + _send_deepseek (consolidate inline)" }
@@ -48,8 +48,8 @@ t3_8 = { status = "pending", commit_sha = "", description = "Adaptation 9: cost
 t3_9 = { status = "pending", commit_sha = "", description = "Phase 3 checkpoint + git note" }
 # Phase 4: Local-first + matrix v2
 t4_1 = { status = "pending", commit_sha = "", description = "Add local: bool to VendorCapabilities" }
-t4_2 = { status = "pending", commit_sha = "", description = "Native Ollama adapter src/llama_ollama_native.py" }
-t4_3 = { status = "pending", commit_sha = "", description = "Meta Llama API adapter src/llama_meta_api.py" }
+t4_2 = { status = "pending", commit_sha = "", description = "Native Ollama adapter (in src/ai_client.py as ollama_chat + _send_llama_native; route Ollama backend to it)" }
+t4_3 = { status = "pending", commit_sha = "", description = "Meta Llama API adapter (in src/ai_client.py as meta_llama_chat; new 4th Llama backend; DEFER if URL still 400)" }
 t4_4 = { status = "pending", commit_sha = "", description = "GUI: 'Local Model' badge" }
 t4_5 = { status = "pending", commit_sha = "", description = "Add 12 v2 fields to VendorCapabilities" }
 t4_6 = { status = "pending", commit_sha = "", description = "Update all vendor registry entries" }