From 77141363bc97c9784d8105754088b82123096847 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Fri, 12 Jun 2026 08:16:08 -0400 Subject: [PATCH] nagent: add v2 and v2.1 review reports - v2 (nagent_review_v2_20260612.md, ~68KB): first delta report on the 8 new nagent commits between 2026-06-08 and 2026-06-12. Introduces 5 new future-track candidates (11-15): knowledge harvest, stable-to-volatile context ordering for caching, conversation compaction, project context files, save-with-graceful-summary-failure. Notes heavy RAG emphasis as the comparison frame for knowledge harvest (later corrected in v2.1). - v2.1 (nagent_review_v2_1_20260612.md, ~59KB): user-driven revision of v2. Five corrections applied: 1. CLAUDE.md -> AGENTS.md swap (Manual Slop has AGENTS.md, not CLAUDE.md) 2. Reframed Candidate 11 from 'RAG alternative' to 'third memory dimension' (curation + discussion + RAG + knowledge) 3. Cache TTL GUI controls added (sub-candidate 12b) per user request 4. RAG integration discipline added (new sub-section 2.10) per user's 'be conservative' rule 5. v2 preserved as draft; v2.1 is non-destructive new file v2.1 also proposes new agent-facing artifacts (canonical DOD file, AGENTS.md update, new ./docs/AGENTS.md) and 8 new styleguides/docs. v2.1 source-citations grounded in 18 nagent source files read in full. - state.toml and metadata.json updated with v2.1 tasks and a v2.1_review block; v1 artifacts preserved per original user instruction. Pending: style preferences (table-based, forth/array-like, not JSON) and the user's upcoming intent-based-scripting-languages report. --- .../nagent_review_20260608/metadata.json | 101 +++ .../nagent_review_v2_1_20260612.md | 688 +++++++++++++++ .../nagent_review_v2_20260612.md | 820 ++++++++++++++++++ .../tracks/nagent_review_20260608/state.toml | 40 +- 4 files changed, 1648 insertions(+), 1 deletion(-) create mode 100644 conductor/tracks/nagent_review_20260608/nagent_review_v2_1_20260612.md create mode 100644 conductor/tracks/nagent_review_20260608/nagent_review_v2_20260612.md diff --git a/conductor/tracks/nagent_review_20260608/metadata.json b/conductor/tracks/nagent_review_20260608/metadata.json index 81c9477c..35ed2050 100644 --- a/conductor/tracks/nagent_review_20260608/metadata.json +++ b/conductor/tracks/nagent_review_20260608/metadata.json @@ -36,6 +36,107 @@ "estimated_phases": 0, "spec": "spec.md", "plan": null, + "v2_review": { + "date": "2026-06-12", + "report": "nagent_review_v2_20260612.md", + "nagent_commits_reviewed": [ + "2c3c78b (2026-06-11 03:32:50) Add conversation compaction and restore initial context on load", + "67a3ea5 (2026-06-11 23:09:57) Add knowledge harvest, tag parser, and claude-code provider", + "5e269ca (2026-06-12 00:17:34) Add project context, prompt caching, and conversation direction", + "ee72cb4 (2026-06-11 23:10:12) Rewrite README prompt around a teaching arc and regenerate README" + ], + "nagent_pushed_at_review": "2026-06-12T00:25:52Z", + "nagent_head_at_review": "eb6be32a", + "new_patterns_identified": [ + "Knowledge harvest (nagent-gc) - new Candidate 11, HIGH priority", + "Stable-to-volatile context ordering for prompt caching - new Candidate 12, MEDIUM priority", + "Conversation compaction (--compact) - new Candidate 13, MEDIUM priority", + "Project context files (context.yaml) - new Candidate 14, LOW priority", + "Save-with-graceful-summary-failure - new Candidate 15, TBD pending source read", + "claude-code provider (subscription auth) - existing Gemini CLI analog, no new track", + "Per-file knowledge notes (knowledge/files/{file_id}.md) - bundle with Candidate 11", + "Delete-to-turn-off feature flags - design pattern, not a track", + "Delegation reframed as context management (not parallelism) - design pattern, not a track" + ], + "v1_artifacts_staleness": { + "report_md": "9 of 16 sections need updates; new sub-sections on knowledge harvest, compaction, caching, project context, claude-code", + "comparison_table_md": "4 existing rows need updates; 4 new rows needed (knowledge harvest, prompt caching strategy, compaction, per-file notes)", + "decisions_md": "4 existing candidates need updates; 5 new candidates (11-15) needed", + "nagent_takeaways_20260608_md": "6 of 10 takeaways need updates; 3 new takeaways needed", + "spec_md": "Still correct; no change needed", + "state_toml_and_metadata_json": "Updated to reflect v2 review (this file)" + }, + "v1_artifacts_preserved": "All v1 files preserved per user instruction ('don't delete the old report'). v2 is additive.", + "user_signal_recorded_for_v2": "User has not yet seen v2 findings; primary surface is Candidate 11 (knowledge harvest) and verification of Candidate 15 (save-with-graceful-failure).", + "next_steps_recommended": [ + "User review of v2 report", + "If user approves: update v1 decisions.md / comparison_table.md / nagent_takeaways_20260608.md to integrate v2 findings", + "Update agent workflow docs (AGENTS.md, conductor/workflow.md, conductor/product-guidelines.md) with v2 design principles", + "Tier 2 source-read: verify 8 items in v2 report §8 before any new candidate is scoped", + "After integration: consider whether to mark v1 track as completed (preserved in archive/) or leave active for further iterations" + ] + }, + "v2_1_review": { + "date": "2026-06-12", + "report": "nagent_review_v2_1_20260612.md", + "status": "v2.1 is the user-revised version; v2 is preserved as the draft per user instruction", + "user_corrections_applied": [ + "CLAUDE.md → AGENTS.md swap throughout (Manual Slop has AGENTS.md, not CLAUDE.md)", + "Reframed Candidate 11 from 'RAG alternative' to 'third memory dimension' (curation + discussion + RAG + knowledge); removed heavy RAG emphasis", + "Added new sub-section 2.10 'RAG integration discipline' (conservative RAG wiring; where RAG fits; where it does not)", + "Expanded Candidate 12 with cache TTL GUI controls (sub-candidate 12b) per user's explicit 'how long the caches are available for (gemini has a limit for example)'", + "Preserved v2 as the draft (NON-DESTRUCTIVE write to nagent_review_v2_1_20260612.md)", + "Preserved Readme.md and docs/Readme.md as human-facing; proposed new agent-facing files (AGENTS.md @import update; new ./docs/AGENTS.md) instead" + ], + "nagent_source_reads_in_full": [ + "bin/nagent (2524 lines) — main loop, build_initial_context at 606-745, conversation_cache_boundaries at 970-987, call_llm at 990-1019, compact_conversation at 1975-2019, --save-conversation at 2147, --branch-conversation at 2157, --compact at 2178", + "bin/helpers/nagent_gc_lib.py (~700 lines, 27KB) — the knowledge harvest library", + "bin/helpers/nagent_tags.py — the new explicit tag parser (replaces regex)", + "bin/helpers/nagent_llm.py — 5+1 providers, cache_prefix_blocks, claude-code provider", + "bin/nagent-gc — the GC CLI wrapper", + "prompts/compact-conversation.md — compaction guidance prompt", + "prompts/harvest-conversation.md — strict-JSON harvest prompt", + "context/data-oriented-design.md (13084 bytes) — the canonical DOD reference (Tier 0/1/2, simplification pass, enforceable deliverables)", + "CLAUDE.md (5832 bytes) — the agent-facing rules file with @import pattern" + ], + "new_candidates_proposed": [ + "Candidate 11 REFRAMED (HIGH) — third memory dimension (not RAG alternative)", + "Candidate 12 EXPANDED (MEDIUM) — 12a stable-to-volatile ordering + 12b cache TTL GUI controls", + "Candidate 16 NEW (HIGH) — AGENTS.md @import pattern + canonical DOD file (foundation for other styleguides)" + ], + "new_artifacts_proposed_for_next_turn": { + "new_agent_facing_files": [ + "conductor/code_styleguides/data_oriented_design.md (NEW canonical DOD file)", + "AGENTS.md (UPDATE — add @import line)", + "./docs/AGENTS.md (NEW — agent-facing mirror of docs/Readme.md)" + ], + "new_styleguides": [ + "conductor/code_styleguides/agent_memory_dimensions.md", + "conductor/code_styleguides/rag_integration_discipline.md", + "conductor/code_styleguides/cache_friendly_context.md", + "conductor/code_styleguides/knowledge_artifacts.md", + "conductor/code_styleguides/feature_flags.md" + ], + "new_project_docs": [ + "docs/guide_knowledge_curation.md", + "docs/guide_caching_strategy.md", + "docs/guide_agent_memory_dimensions.md" + ], + "updates_to_existing_workflow_docs": [ + "conductor/workflow.md (TDD protocol additions)", + "conductor/product-guidelines.md (memory dimensions section)", + "docs/guide_mma.md (context management framing)", + "docs/guide_ai_client.md (cache TTL section)" + ] + }, + "preserved_files_NOT_modified": [ + "nagent_review_v2_20260612.md (v2 draft, per user instruction)", + "report.md, comparison_table.md, decisions.md, nagent_takeaways_20260608.md (v1 review artifacts)", + "spec.md, state.toml (original), metadata.json (pre-v2.1)", + "Readme.md (project root, human-facing)", + "docs/Readme.md (docs index, human-facing)" + ] + }, "nagent_principles_covered": [ "Durable work, disposable workers", "Text in, text out", diff --git a/conductor/tracks/nagent_review_20260608/nagent_review_v2_1_20260612.md b/conductor/tracks/nagent_review_20260608/nagent_review_v2_1_20260612.md new file mode 100644 index 00000000..85b48fa2 --- /dev/null +++ b/conductor/tracks/nagent_review_20260608/nagent_review_v2_1_20260612.md @@ -0,0 +1,688 @@ +# nagent Review v2.1: User-Revision of v2 + +**Track:** `nagent_review_20260608` +**Date:** 2026-06-12 +**Author:** Tier 1 Orchestrator +**Companion to:** `nagent_review_v2_20260612.md` (the v2 draft, preserved as-is) +**Status:** v2.1 is a user-driven revision of v2; v2 remains the draft, v2.1 is the corrected version. +**Read this if:** the v2 framing was off in places, the heavy RAG emphasis was wrong, the CLAUDE.md swap needs to be AGENTS.md, or the new agent-facing docs need to be planned. + +> **Revision note (2026-06-12).** v2 (`nagent_review_v2_20260612.md`, ~68KB) was the first delta report. The user reviewed it and pushed back on five points. v2.1 incorporates all five corrections: +> +> 1. **CLAUDE.md → AGENTS.md swap.** nagent added a `CLAUDE.md`; Manual Slop has `AGENTS.md` (and not `CLAUDE.md`) in active use. All "CLAUDE.md pattern" references in v2 are now "AGENTS.md pattern" in v2.1. +> 2. **Heavy RAG emphasis is wrong.** RAG is opt-in, not mandatory, and the user is conservative about wiring it. Candidate 11 (Knowledge Harvest) should be reframed as a **third memory dimension** that complements Manual Slop's existing **curation** (FileItem + ContextPreset) and **discussion editing** (per-entry A1-A7) — *not* as a RAG replacement. v2 had a 4-paragraph RAG-comparison table that was the wrong shape; v2.1 reframes Candidate 11 around the existing memory landscape and adds a new "RAG integration discipline" sub-section that says *where* RAG fits (and where it does not). +> 3. **No restructuring of `Readme.md` or `docs/Readme.md`.** Those are human-facing docs and stay that way. v2.1 proposes **new agent-facing files** instead: `AGENTS.md` updates (add `@import` pattern) and a **new `./docs/AGENTS.md`** that mirrors the nagent CLAUDE.md model. The human Readme files are not touched. +> 4. **Cache TTL GUI controls.** v2 had no mention of explicit cache TTL per provider. The user wants GUI controls for: (a) which discussions get cached, (b) when to invalidate, (c) how long caches are available (Gemini explicit caching has a default 1-hour TTL; Anthropic ephemeral caching has a 5-minute default). v2.1 adds these as sub-candidates under Candidate 12. +> 5. **Source reads in full.** v2 was based on the README + commit messages. v2.1 is based on the full source: `bin/nagent` (2524 lines), `bin/helpers/nagent_gc_lib.py` (the harvest lib), `bin/helpers/nagent_tags.py`, `bin/helpers/nagent_llm.py` (cache_prefix_blocks), `bin/nagent-gc`, the prompts (`prompts/compact-conversation.md`, `prompts/harvest-conversation.md`), the canonical `context/data-oriented-design.md`, and `CLAUDE.md` (for the import pattern). v2.1 has file:line citations throughout. +> +> **Net effect.** v2 is preserved (it's a useful baseline). v2.1 supersedes v2 in substance but does not delete or overwrite v2. v1 is preserved (per the original user instruction). + +--- + +## 0. TL;DR (revised) + +| New in nagent | Manual Slop equivalent (corrected) | Verdict | New future-track candidate? | +|---|---|---|---| +| **Knowledge harvest** (`nagent-gc` → `~/.nagent/knowledge/`) | **THIRD memory dimension**, alongside (a) **curation** memory (`FileItem` + `ContextPreset` + Fuzzy Anchors) and (b) **discussion** memory (`disc_entries` + branching + UISnapshot A1-A7). RAG (`src/rag_engine.py`) is opt-in and not in the comparison. | **GAP (Application).** Knowledge dimension absent; curation and discussion dimensions are present and strong. | **YES** — Candidate 11 (REFRAMED) | +| **Prompt caching with stable-to-volatile context ordering** (`bin/nagent:1013-1014` passes `--cache-prefix-chars`; `bin/helpers/nagent_llm.py:cache_prefix_blocks` splits at offsets and adds `cache_control: {"type": "ephemeral"}`) | `src/ai_client.py:_add_history_cache_breakpoint`, `_send_anthropic` already use `cache_control` | **PARTIAL.** Mechanism present; ordering not enforced. Cache TTL not exposed in GUI. | **YES** — Candidate 12 (EXPANDED with cache TTL GUI controls) | +| **Conversation compaction** (`--compact` with editable `prompts/compact-conversation.md`; root-first resolution) | `src/gui_2.py:4252` `Compress` button → `run_discussion_compression` (summarization, not compaction) | **GAP.** Summarize, not compact. | **YES** — Candidate 13 | +| **Project context files** (`context.yaml` at git toplevel) | `manual_slop.toml` per-project (different syntax) | **PARITY-DIFFERENT-MECHANISM.** | Maybe — Candidate 14 | +| **claude-code provider** (5th provider, subscription auth via Claude Agent SDK; `default` model = local config) | `src/ai_client.py:_send_gemini_cli` (parallel pattern: local subprocess auth) | **PARITY.** No new track; provider addition only if user wants. | No | +| **Per-file knowledge notes** (`knowledge/files/{file_id}.md` keyed by inode) | `models.FileItem` has no `notes` field | **GAP.** | Bundle with Candidate 11 | +| **"Delete to turn off" feature flags** (`rm digest.md` → injection off) | `[ai_settings.toml]` toggles, GUI checkboxes | **PARITY-DIFFERENT-MECHANISM.** Design pattern, not a track. | N/A | +| **Save-with-graceful-summary-failure** (summary fails → save still completes with `(summary unavailable)` marker) | `ai_client.run_discussion_compression` failure mode **NEEDS SOURCE READ** | **UNKNOWN.** | Candidate 15 (TBD) | +| **AGENTS.md / `@import` pattern** (nagent's `CLAUDE.md` imports `context/data-oriented-design.md`) | Manual Slop has `AGENTS.md` already; canonical reference file is absent | **GAP.** Need to add the canonical rules file and the `@import` pattern. | Yes (workflow doc update, not a separate track) | +| **Delegation reframed as "context management, not parallelism"** | MMA already does this implicitly | **PARITY (new framing).** Design pattern, not a track. | N/A | +| **Cache TTL exposure** (nagent doesn't expose this — providers do) | Manual Slop has Gemini explicit cache + Anthropic ephemeral cache; no GUI for TTL | **GAP (UX).** | Sub-candidate under 12 | + +**Verdict in one sentence (revised):** v2 nagent's major new pattern is **knowledge harvest**, which fits as a **third memory dimension** alongside Manual Slop's existing curation and discussion memory. The caching pattern is **partially present** (mechanism, no ordering discipline, no GUI exposure of TTL). Compaction is **absent** (we have summarize, not compact). The AGENTS.md `@import` pattern is **absent** in Manual Slop and is the foundation for a new canonical rules file. + +--- + +## 1. Source reads in full (new in v2.1) + +The v2 report was based on the README + commit messages. v2.1 is grounded in the actual source: + +| File | Lines | What was learned (key citations) | +|---|---|---| +| `bin/nagent` | 2524 | `build_initial_context` at line 606-745 (the full block order, see §2.4 below); `conversation_cache_boundaries` at line 970-987 (the two stable/volatile boundary points); `call_llm` at line 990-1019 (passes `--cache-prefix-chars` per boundary at line 1013-1014); `compact_conversation` at line 1975-2019 (compaction is implemented as `edit_conversation` with a `compact_guidance` prompt); `--save-conversation` at line 2147; `--branch-conversation` at line 2157; `--compact` at line 2178; the `` and `conversation-name="name"` worker-reuse tags at line 704-706 | +| `bin/helpers/nagent_gc_lib.py` | ~700 (27KB) | The full knowledge harvest pipeline: `ITEM_CATEGORIES = ("facts", "decisions", "tasks_done", "tasks_open", "questions", "playbooks", "files")` (line ~30); `DIGEST_MAX_BYTES = 4 * 1024` (line ~13); `MAX_HARVEST_SOURCE_BYTES = 1024 * 1024` (line 14, the 1MB budget); `HARVEST_MAX_ATTEMPTS = 2` (line 15, retry budget); `merge_harvest` appends bullets with provenance `[from: conversation_name, date]` (line ~285); the file_notes branch: if path resolves to existing file → `knowledge/files/{file_id}.md`; else fall back to `facts.md` (line ~325); `regenerate_digest` orders sections (Open tasks, Open questions, Decisions, Facts, Playbooks) and reverses bullets for newest-first (line ~395); `run_gc` ledger gate (line ~510); "too-large" handling (line ~595); "already harvested" path (line ~570) | +| `bin/helpers/nagent_tags.py` | ~160 (6KB) | The new explicit tag parser. Replaces regex parsing. `TagNode` dataclass with `name, attrs, content, self_closing, start, end`. `parse_tag_document` walks whitespace + elements. `find_block_span`, `extract_block`, `replace_first_block`, `remove_first_block` are the public helpers. **The protocol is XML-ish, not XML** — first matching close tag wins; no entity escaping. | +| `bin/helpers/nagent_llm.py` | ~440 (20KB) | 5 providers (openai, anthropic, google, cursor, claude-code); `CREDENTIAL_ENV` is empty for claude-code (local login); `cache_prefix_blocks` at the bottom: splits message at given char offsets (max 3 prefix blocks per Anthropic's 4-breakpoint limit), adds `cache_control: {"type": "ephemeral"}` to each prefix; `_result_with_usage` folds `cache_read_input_tokens + cache_creation_input_tokens` into `input_tokens` so "input_tokens stays 'tokens sent' across providers"; `claude-code` provider uses `Claude Agent SDK`, `model=None` for default, `max_turns=1` for text gen, `max_turns=None` for file-upload mode | +| `bin/nagent-gc` | ~150 (5KB) | The CLI wrapper. `run_gc` is library-callable; CLI defaults to dry-run. `--apply` mutates; `--no-harvest` reclaims without LLM pass; `--max-harvest-bytes N` caps the conversation bytes sent to the LLM this run (deferral pattern). | +| `prompts/compact-conversation.md` | 3237 bytes | The compaction guidance prompt. Key sections: "Core Principle: The agent is not the thing. The data is the thing."; "Data-Oriented Rules" (what to keep / what to remove); "Transformation Rules" (replace many shell commands with verified outcomes, etc.); "Preserve Artifact Knowledge" (keep references to root context, per-file conversations, file summaries, repository history summaries); "Preserve Failure Knowledge" (keep failed experiments, rejected designs, dangerous edge cases); "Required Output Structure" (User Intent, Current Objective, Accepted Decisions, Constraints, Durable Knowledge, etc.); "Self Review" (10 yes/no questions) | +| `prompts/harvest-conversation.md` | 1674 bytes | The harvest prompt. Strict JSON output, no prose, no markdown fence. Categories: `facts, decisions, tasks_done, tasks_open, questions, playbooks, files`. Per-category rules ("facts: durable statements about systems... not assumed"). "Empty arrays are valid and expected: most conversations contain nothing durable. Do not invent items to fill categories." | +| `context/data-oriented-design.md` | 13084 bytes | The canonical DOD reference. Defines "Tier 0/1/2" complexity levels (analogous to the 4-tier MMA). The "three default beliefs to reject" (tools are not the platform; design around a model of the world; solution matters more than the data). The "simplification pass" (7 questions: not do this; only once; fewer times; approximate; small lookup; large lookup; small buffer). The "data protocols between systems" section. The "enforceable deliverables (tier 2)" section (batch transform contract, plural/batch path, justification for any pointer-heavy hot path, explicit out-of-range behavior, unresolved design questions as local issue files). The "final self-check" checklist. **This is the file that should be the canonical reference for Manual Slop's DOD principles.** | +| `CLAUDE.md` | 5832 bytes | The agent-facing rules file. Imports the canonical DOD file via `@context/data-oriented-design.md`. Has a "What this is" section, "Commands" section, "Architecture" section (the 4 reading levels: bin/nagent, helpers, CLI front-ends, lib), "The structured-tag protocol" section, "Durable state lives under `~/.nagent/`" section, "Write boundaries" section, "Large files" section, "Conventions for changes" section. **This is the pattern Manual Slop should mirror with `AGENTS.md` (project root) + a new `./docs/AGENTS.md` (docs root).** | +| `bin/helpers/nagent_cli.py` | 2642 bytes | `exit_on_description()` (the `--description` self-describing pattern); `collect_bin_tool_descriptions()` (iterates `bin/` and runs `--description` on each); `WaitSpinner` (animated spinner with `enabled` flag for non-TTY). | +| `bin/helpers/nagent_file_split_lib.py` | 15427 bytes | `source_sha256()` (the hash function for split validation); `SPLIT_TYPES` (11 languages); per-language scoring (regex + line counts + brace/JSON/XML depth); the recent O(n²) → O(n) perf fix. | +| `bin/helpers/nagent_file_edit_lib.py` | 5232 bytes | `file_id_for_path(path) -> "{st_dev}:{st_ino}"`; the per-file conversation index; the file-edit conversation file naming. | +| `bin/helpers/nagent_file_patch_lib.py` | 5086 bytes | `validate_index` (the strict hash check); `merge_segments`; `make_unified_patch`; `apply_segment_patches`. | +| `bin/helpers/nagent_file_summarize_lib.py` | 3884 bytes | `SUMMARY_MAX_ATTEMPTS = 2`; `summarize_content` (per-segment LLM call with retry); `combined_summary_from_index`. | +| `prompts/create-readme.md` | 28245 bytes | The README-generation prompt. **Not relevant to v2.1**; it's a workflow tool, not a system pattern. | +| `context.yaml` | 34 bytes | A pointer: `paths: [context/data-oriented-design.md]` | +| `requirements.txt` | 94 bytes | Dependencies: `claude-agent-sdk` (new), plus the standard openai/anthropic/google SDKs | + +**v1.1 implication:** v1's report said it read the nagent source in full. v2.1 confirms that and adds the explicit file:line citations for the v2 patterns. The v1 spec.md claim "All 11 source files read in full" is now verifiable. + +--- + +## 2. Revised new-pattern analysis + +### 2.1 Knowledge Harvest (REFRAMED — was: "RAG alternative", now: "third memory dimension") + +**The reframing.** v2 framed Candidate 11 as "Manual Slop's RAG is fuzzy + opaque; nagent's knowledge store is exact + editable + provenance-aware." This is the wrong shape. RAG is opt-in, not mandatory, and the user is conservative about wiring it. The correct framing: + +**Manual Slop has two memory dimensions today. The new candidate adds a third.** + +| Dimension | Where it lives | What it stores | How it's edited | How it's queried | +|---|---|---|---|---| +| **Curation memory** (existing) | `FileItem` (path + view_mode + ast_mask + custom_slices) + `ContextPreset` (saved set of FileItems) + `models.py:510-559` | *How to render a file* in the AI's context window. Per-file, per-discussion. | GUI Structural File Editor; project TOML edit | Implicit in `aggregate.py:run` at discussion start | +| **Discussion memory** (existing) | `app.disc_entries` (`gui_2.py:3770-3853` per-entry A1-A7; `gui_2.py:4239-4260` discussion-level B1-B11; `src/history.py:8-63` UISnapshot A1-C5) | *What was said* in the conversation. Per-discussion, multi-turn. | GUI `[Edit]` mode per entry; `[Branch]` button; `UISnapshot` undo/redo | `aggregate.py:build_markdown` renders as the LLM's prior context | +| **RAG memory** (opt-in) | `src/rag_engine.py` (ChromaDB vector store) | *Semantic fingerprints* of indexed files. Cross-file, cross-discussion. | None (vector store is opaque) | `RAGEngine.search()` at LLM call time | +| **Knowledge memory** (proposed) | `~/.manual_slop/knowledge/{facts,decisions,questions,playbooks}.md` + `knowledge/files/{file_id}.md` + `knowledge/digest.md` + `knowledge/ledger.json` | *Durable learnings* harvested from past sessions: facts, decisions, tasks, questions, playbooks, per-file notes. Cross-discussion, provenance-aware. | Plain markdown edit (user can correct any "fact") | Bounded digest injected as a stable prefix at discussion start | + +**The harvest pattern in detail (from the source).** The `prompts/harvest-conversation.md` template produces strict JSON: + +```json +{ + "facts": [{"statement": "...", "detail": "..."}], + "decisions": [{"statement": "...", "detail": "..."}], + "tasks_done": [{"statement": "...", "detail": "..."}], + "tasks_open": [{"statement": "...", "detail": "..."}], + "questions": [{"statement": "...", "detail": "..."}], + "playbooks": [{"name": "...", "steps": "..."}], + "files": [{"path": "...", "note": "..."}] +} +``` + +Each bullet is appended to a category file with provenance: `f"{text} [from: {conversation_name}, {date}]"`. The `files` category is special: if the path resolves to an existing file, the note goes to `knowledge/files/{file_id}.md` (keyed by inode, like nagent's per-file conversations). If the path doesn't resolve, the note falls back to `facts.md` (the note survives, just loses the per-file binding). + +The digest (`regenerate_digest`, `bin/helpers/nagent_gc_lib.py:395+`) is a bounded (4KB max) regeneration from the category files. Sections in fixed order: Open tasks, Open questions, Decisions, Facts, Playbooks. **Newest first** within each section (because the category files are append-only). Truncation is explicit: "(truncated; see the category files for the rest)". + +The **ledger** (`bin/helpers/nagent_gc_lib.py:130+`) gates deletion on a sha256-of-content entry. Two conversations with identical content share a ledger entry. Identical content never pays the LLM twice. The `run_gc` "already harvested" branch reclaims without re-distilling: `if existing.get("status") == "harvested": reclaimed += size; path.unlink()`. + +**What "delete to turn off" means.** `regenerate_digest` deletes `digest.md` when there are no sections (`if not sections: if target.is_file(): target.unlink()`). The injection path in `build_initial_context` checks for the digest file's existence: `if knowledge_digest:` (line 677). Delete the file → no injection. The "feature flag" is the file's presence. + +**The user's specific instruction:** "I rather reframe manual slops current state with 'knowledge harvest' or curation to what is done with context composition relative to files & media, and direct discussion entry editing. I can expose more explicit controls in the future for handling discussion caching and what not.. also expose how long the caches are available for (gemini has a limit for example)." + +**The applied reframing:** +- "Knowledge harvest or curation" — the new dimension is "knowledge" in the sense of *durable, user-editable, provenance-aware learnings*, not curation (which is already well-covered by FileItem/ContextPreset). +- "what is done with context composition relative to files & media" — the existing curation memory is the FileItem/ContextPreset story. Don't compete with it; complement it. +- "direct discussion entry editing" — the existing discussion memory is the A1-A7/B1-B11/C1-C5 story. Don't compete with it; complement it. +- "expose more explicit controls in the future for handling discussion caching" — see §2.2 below (expanded Candidate 12). +- "how long the caches are available for (gemini has a limit for example)" — see §2.2 below (cache TTL GUI controls). +- **"we just make targeted wiring of rag usage across features where it may be beneficial but we should be conservative"** — see §2.10 below (RAG integration discipline). + +**Verdict (revised).** **GAP (Application)** for the third memory dimension. The first two dimensions (curation, discussion) are well-covered. RAG is opt-in. Knowledge memory is the missing fourth. + +**Domain tag:** Both. The user-facing knowledge store is Application; the harvest/regen cycle is Application-orchestrated but could be Meta-Tooling-triggered. + +**Effort:** Large (3-5 phases). See Candidate 11 in §6. + +**Recommended priority:** **HIGH** (unchanged from v2). + +--- + +### 2.2 Prompt Caching + Cache TTL GUI Controls (EXPANDED Candidate 12) + +**The two-part scope.** v2 had only "stable-to-volatile context ordering." v2.1 adds **cache TTL GUI controls** as a second sub-candidate. + +**Part A: Stable-to-volatile context ordering (same as v2).** The mechanism is grounded in the source. + +`bin/nagent:970-987` (`conversation_cache_boundaries`): +```python +def conversation_cache_boundaries(text: str) -> list[int]: + """Character offsets ending the stable prefixes of a conversation file. + Two boundaries when the file starts with an initial-context block: the + start of the volatile Instance section (shared byte-for-byte across + conversations of the same mode and root) and the end of the context block + (stable across every turn of this conversation).""" + span = find_block_span(text, INITIAL_CONTEXT_BLOCK) + if span is None or span[0] != 0: + return [] + boundaries: list[int] = [] + volatile_at = text.find("\nInstance:", span[0], span[1]) + if volatile_at > 0: + boundaries.append(volatile_at) + if span[1] < len(text): + boundaries.append(span[1]) + return boundaries +``` + +`bin/nagent:1013-1014` (`call_llm`): +```python +for boundary in conversation_cache_boundaries(conversation_text): + command.extend(["--cache-prefix-chars", str(boundary)]) +``` + +`bin/helpers/nagent_llm.py` (`cache_prefix_blocks`): +```python +def cache_prefix_blocks(message: str, cache_boundaries: list[int] | None): + """Split a message into content blocks at the given character offsets, + marking each prefix block with cache_control so providers that cache on + block boundaries can reuse stable prefixes. At most 3 prefix blocks + (provider limit is 4 breakpoints per request).""" +``` + +This is the *implementation* of "stable-to-volatile ordering as data, not convention." The boundaries are *computed from the file* (the `\nInstance:` marker), not hard-coded. The `--cache-prefix-chars` flow is CLI-visible. The Anthropic-specific path wraps the message in `content` blocks with `cache_control: {"type": "ephemeral"}`. + +`bin/nagent:606-745` (`build_initial_context`) shows the actual block order — see §2.4 below. + +**Part B: Cache TTL GUI controls (NEW in v2.1).** The user said: "I can expose more explicit controls in the future for handling discussion caching and what not.. also expose how long the caches are available for (gemini has a limit for example)." + +The Manual Slop current state: +- `src/ai_client.py:_send_gemini` has explicit caching (`get_gemini_cache_stats` is exported per the summary; `_GEMINI_CACHE_TTL` is a constant). +- `src/ai_client.py:_send_anthropic` has ephemeral caching via `cache_control` blocks. +- The GUI has no exposure of either: no per-discussion cache toggle, no TTL display, no cache hit rate, no "this discussion is currently cached" indicator. + +The user's specific ask: +1. **"Handling discussion caching"** — per-discussion decisions: cache this discussion? When to invalidate? Show the cache state in the GUI. +2. **"How long the caches are available for"** — TTL exposure. Gemini explicit cache default is 1 hour (configurable in API call). Anthropic ephemeral cache default is 5 minutes (per-request, not configurable). Google has its own model. +3. **"Targeted wiring of rag usage across features where it may be beneficial but we should be conservative"** — see §2.10. + +**The proposed GUI surface** (sub-candidate 12a): +- A "Caching" tab in Operations Hub (parallel to the planned Vendor State tab per Phase 8 UI Polish). +- Per-provider cache configuration: TTL override, model eligibility, default mode (ephemeral vs explicit vs none). +- Per-discussion cache state: which discussion is currently cached, when the cache was created, when it expires. +- Cache hit rate per provider (aggregated from `_send_anthropic` usage metadata which has `cache_read_input_tokens` and `cache_creation_input_tokens`). + +**Verdict (revised).** **PARTIAL** for ordering; **GAP (UX)** for cache TTL exposure. + +**Domain tag:** Application (UX + AI client internals). + +**Effort:** +- Part A (ordering): Small (1-2 phases) IF the existing cache_control calls are mostly correct. +- Part B (TTL GUI): Medium (2-3 phases) — new panel, AI client telemetry, per-discussion state tracking. + +**Recommended priority:** **MEDIUM** for Part A, **MEDIUM** for Part B (the user explicitly wants this). + +--- + +### 2.3 Conversation Compaction (unchanged from v2) + +**Verdict:** **GAP (Application).** Manual Slop has `run_discussion_compression` (summarization). Compaction is the rewrite-in-place variant. + +**The source-grounded pattern.** `bin/nagent:1975-2019` (`compact_conversation`) is implemented as `edit_conversation` driven by the `compact_guidance` prompt. The compaction prompt (`prompts/compact-conversation.md`) defines: +- "Core Principle: The agent is not the thing. The data is the thing." +- "Data-Oriented Rules": keep decisions, user requirements, constraints, discovered invariants, successful/failed experiments; remove repeated reasoning, repeated shell output, duplicated summaries, obsolete hypotheses, intermediate exploration, dead conversations. +- "Transformation Rules": replace many shell commands with verified outcomes; replace long investigations with "conclusion + evidence"; merge duplicate investigations. +- "Preserve Artifact Knowledge": keep references to root context, per-file conversations, file summaries, repository history summaries, historical coupling, split indexes, patch artifacts. **Prefer references over duplication.** +- "Preserve Failure Knowledge": keep failed experiments, rejected designs, dangerous edge cases, corrected assumptions. +- "Required Output Structure": User Intent, Current Objective, Accepted Decisions, Constraints, Durable Knowledge (Global, Artifact Local, Repository History, Historical Coupling), Verified Facts, Important Failed Attempts, Open Questions, TODO, Minimal Context Needed To Continue. +- "Self Review": 10 yes/no questions, including "Can another worker continue immediately?", "Would expensive investigation need to be repeated?", "Has chronology been replaced with state?", "Is future capability unchanged or improved?". + +**Domain tag:** Application. + +**Effort:** Small to medium (1-2 phases). + +**Recommended priority:** **MEDIUM** (unchanged from v2). + +--- + +### 2.4 Project Context Files (AGENTS.md swap applied) + +**The user's swap instruction.** "for the 3rd commit, we have an AGENTS.md but not a CLAUDE.md in active use. So lets swap that if posible." + +**What nagent v2 added.** Commit `d86bce8` "Add CLAUDE.md importing the shared data-oriented design rules" (2026-06-11 23:10:12). The CLAUDE.md file is 5832 bytes and imports the canonical `context/data-oriented-design.md` via `@context/data-oriented-design.md`. The same file is injected into every nagent conversation via the repo's `context.yaml`. One source of truth for both harnesses. + +**The pattern Manual Slop should mirror.** Manual Slop already has `AGENTS.md` (the project root, ~5.4KB per the AGENTS.md content I have). The pattern would be: +1. Create a canonical rules file at `conductor/code_styleguides/data_oriented_design.md` (new). +2. Add `@conductor/code_styleguides/data_oriented_design.md` to `AGENTS.md` (existing). +3. Inject the same file via `[agent]` section in `manual_slop.toml` (or equivalent project config) so the Application's RAG / context assembly picks it up. + +**The v2 CLAUDE.md content** (relevant excerpt): +```markdown +# CLAUDE.md +This file provides guidance to Claude Code when working with code in this repository. + +## Operating rules +@context/data-oriented-design.md +The same file is injected into every nagent conversation via the repo's context.yaml — +one source of truth for both harnesses. Edit it there; do not duplicate rules into this file. + +## What this is +**nagent** ("not-an-agent") is a small reference implementation of a data-oriented LLM +workflow loop. The thesis drives every design decision and should drive yours: +**the data is the thing, not the agent.** ... +``` + +**Where the canonical rules file would live for Manual Slop.** `conductor/code_styleguides/data_oriented_design.md` is the right place. It's already a directory of canonical styleguides (`chroma_cache.md`, `config_state_owner.md`, `error_handling.md`, `python.md`, `workspace_paths.md`). The DOD file would be the sixth. + +**The "human Readme files" constraint.** The user said: "I don't want to restructure my ./Readme or ./docs/Readme.md to be tailored towards agents. I want to keep those as human interfacing docs, I rather you make an agents readme (which is technically AGENTS.md, and a another for ./docs if necessary)." + +So: +- `Readme.md` (project root) — human-facing, **unchanged**. +- `docs/Readme.md` (docs index) — human-facing, **unchanged**. +- `AGENTS.md` (project root) — **agent-facing**; exists already, gets `@import` update. +- `./docs/AGENTS.md` (NEW) — **agent-facing** mirror of `docs/Readme.md`; the "another for ./docs if necessary" the user mentioned. + +The new `./docs/AGENTS.md` would be the agent-facing equivalent of `docs/Readme.md`. It would import the canonical DOD file, point to the relevant styleguides, and explain which `docs/guide_*.md` files are most useful for which tier (Tier 1/2/3/4 MMA). + +**Verdict (revised).** **PARITY-DIFFERENT-MECHANISM** for the project-context pattern (nagent uses `context.yaml`/markdown; Manual Slop uses TOML). **GAP** for the AGENTS.md `@import` pattern (Manual Slop has `AGENTS.md` but no canonical rules file to import). + +**Domain tag:** Both (the file is in the repo; the consumer is the agent harness *and* the Application's RAG injection). + +**Effort:** Small (1 phase). Write the canonical file, update `AGENTS.md`, create `./docs/AGENTS.md`, inject via `manual_slop.toml`. + +**Recommended priority:** **HIGH** — the foundation for the new agent-facing docs. Without this, the other styleguides (knowledge, caching) lack a home. + +**Cross-references:** See §4 (Proposed new artifacts for the next turn) for the full list. + +--- + +### 2.5 claude-code provider (unchanged from v2) + +**Verdict:** **PARITY** with Manual Slop's `_send_gemini_cli` (the existing local-CLI subscription-auth pattern). No new track; provider addition only if user wants. + +**The source.** `bin/helpers/nagent_llm.py:65-80`: +- `PROVIDERS = ("openai", "anthropic", "google", "gemini", "cursor", "claude-code")` (note: 6 entries, not 5; `gemini` is an alias for `google`) +- `CREDENTIAL_ENV["claude-code"] = ()` (empty tuple — no env var read; uses local Claude Code login) +- `_claude_code_generate` uses `claude_agent_sdk`, `model=None` for "default" mode, `max_turns=1` for text gen +- Tools are disabled for plain text; `nagent-llm-upload` permits only the Read tool so Claude Code can read the file locally + +**Domain tag:** Application. + +**Effort:** Medium (a new provider is ~200-400 lines). + +**Recommended priority:** **LOW** (unchanged from v2). + +--- + +### 2.6 Per-file knowledge notes (unchanged from v2) + +**Verdict:** **GAP.** FileItem has no notes field. + +**The source.** `bin/helpers/nagent_gc_lib.py:merge_harvest` "files" branch: +- If the path resolves to an existing file → `_append_bullets(file_knowledge_path(root, file_id), f"# {target.resolve()}", [f"{note} {provenance}"])` where `file_id = file_id_for_path(target)` (the st_dev:st_ino) +- If the path doesn't resolve → fall back to `_append_bullets(knowledge / "facts.md", "# Facts", [f"{prefix}{note} {provenance}"])` where `prefix = f"{path_text}: "` — the note survives as a fact, just loses the per-file binding + +This is the per-file memory dimension that v1 §6 (Per-File Memory) didn't fully capture. nagent's per-file memory is: (a) per-file conversation (v1 §6) + (b) per-file knowledge notes (v2 addition). The combination is the *complete* per-file memory. + +**Domain tag:** Application. Bundle with Candidate 11. + +**Effort:** Small (1 phase) — add `notes: str = ""` to `FileItem`, GUI text area, `aggregate.py:run` integration. + +**Recommended priority:** **LOW** (bundle with Candidate 11). + +--- + +### 2.7 "Delete to turn off" feature flags (unchanged from v2) + +**Verdict:** **PARITY-DIFFERENT-MECHANISM.** Design pattern, not a track. Worth a styleguide. + +**The source.** `bin/helpers/nagent_gc_lib.py:regenerate_digest` (line ~395): `if not sections: if target.is_file(): target.unlink() return None`. The `build_initial_context` injection check at line 677: `if knowledge_digest:`. Delete the file → no injection. The file is the switch. + +**Domain tag:** Both. Design pattern. + +**Effort:** N/A (styleguide, not a track). + +**Recommended priority:** **LOW.** Styleguide update, not a track. + +**Cross-references:** Add to `conductor/code_styleguides/feature_flags.md` (new styleguide) or as a section in `data_oriented_design.md`. + +--- + +### 2.8 Save-with-graceful-summary-failure (unchanged from v2) + +**Verdict:** **UNKNOWN** without reading the source. v2.1 still has this as Candidate 15 (TBD). + +**The nagent source.** `bin/nagent:2150-2180` (`--save-conversation` and `--branch-conversation`) shows the save path. The harvest path (`bin/helpers/nagent_gc_lib.py:harvest_conversation`) handles failures gracefully: `except (OSError, RuntimeError, ValueError, UnicodeDecodeError) as exc: failures.append(...); entries[sha] = {"status": "harvest-failed", ...}; emit(...)` — the artifact is *kept* (not deleted) on failure, the failure is recorded in the ledger, the user sees it. + +**What Manual Slop needs to verify.** `src/ai_client.py:run_discussion_compression` — does it raise on LLM failure (destructive) or fall back to the original (graceful)? Per the v1 takeaways, the v1 reviewer noted this is a concern but didn't verify the source. + +**Recommended priority:** **TBD** until the source is read. + +--- + +### 2.9 Delegation reframed as "context management, not parallelism" (unchanged from v2) + +**Verdict:** **PARITY (new framing).** Design pattern. + +**The source.** `bin/nagent:715-731` (inside `build_initial_context`, the "Context management" section): "Hand off when noisy: if this conversation is mostly stale tool output, distill goal/state/decisions into a sub-conversation prompt, delegate the rest, and tell your caller about the handoff. Never rewrite your own conversation file while running." + +**The MMA analog.** `src/multi_agent_conductor.py` already does this implicitly. The new framing is for documentation, not implementation. + +**Recommended priority:** **LOW.** Update `docs/guide_mma.md` with the new framing. + +--- + +### 2.10 RAG integration discipline (NEW in v2.1) + +**The user's instruction.** "the rag introduces the vector db fuzz which is not required, its something the user can opt into so at worst case we just make targeted wiring of rag usage across features where it may be beneficial but we should be conservative." + +**The current RAG surface in Manual Slop.** Per the v1 review: +- `src/rag_engine.py:1-384` — the RAG engine, 384 lines, ChromaDB-backed +- `tests/test_rag_engine.py` — RAG tests +- `tests/test_rag_phase4_stress.py` — phase 4 stress tests +- `tests/test_rag_collection_dim_mismatch_recreates_collection` + `test_rag_collection_dim_match_preserves_collection` — recent dim-mismatch regression tests (commit `16412ad5`) +- GUI: a RAG section in the AI Settings (likely in `gui_2.py`; would need verification) + +**Where RAG fits well (the user's "beneficial" cases):** +- Semantic search across a large codebase when the user asks "where does X happen?" +- Concept-level discovery ("how does the execution clutch work?") where keyword search misses +- Cross-file pattern matching that grep can't do + +**Where RAG does NOT fit (the user's "conservative" warning):** +- Per-file curation (FileItem + ContextPreset is the right tool; RAG would be redundant) +- Per-discussion context (the discussion memory is already structured; RAG would be lossy) +- Knowledge harvest (the *third* memory dimension; RAG is a different shape) +- Per-file knowledge notes (the harvest pattern; RAG would be the wrong granularity) + +**The proposed discipline.** A new `conductor/code_styleguides/rag_integration_discipline.md` (or a section in `data_oriented_design.md`) that codifies: +1. RAG is opt-in. Default-off in new projects. +2. RAG complements, never replaces, the other memory dimensions. +3. RAG results must be displayed with provenance (which file, which chunk). +4. RAG never mutates state (no auto-injection, no auto-update). +5. RAG integration is feature-gated: a feature must explicitly request RAG in its scope. +6. RAG's failure mode is graceful: a failed search returns empty, never crashes the request. + +**Verdict (new).** **GAP (documentation).** The discipline is implicit; not codified. + +**Domain tag:** Both. + +**Effort:** Small (1 phase) — write the styleguide. + +**Recommended priority:** **MEDIUM** — without the discipline, future RAG integration will be ad-hoc. + +--- + +## 3. The 13-step "Build Your Own" list (unchanged from v2) + +v1's 12-step list is now 13. New step 10: "Harvest dead conversations into a knowledge store; inject a bounded digest." The implication for v1's Appendix A is unchanged from v2: bump from 12 to 13. + +--- + +## 4. Proposed new artifacts (for the next turn) + +The user said: "I definitely want to make new docs that related ot the ones v1 originally made but not override them. I definitely also want to update the workflwo docs." + +The v2.1 proposals. **All new files; none override v1 artifacts or human Readmes.** + +### 4.1 New agent-facing files (the AGENTS.md family) + +| File | Type | Source for content | Why | +|---|---|---|---| +| `conductor/code_styleguides/data_oriented_design.md` | NEW | Cloned from nagent's `context/data-oriented-design.md` (13084 bytes), adapted to Manual Slop | The canonical DOD reference. Imported by `AGENTS.md` and injected via `manual_slop.toml`. | +| `AGENTS.md` (existing, ~5.4KB) | **UPDATE** | Add `@conductor/code_styleguides/data_oriented_design.md` to top | Per nagent's `CLAUDE.md` pattern: import the canonical rules file; one source of truth for both harnesses | +| `./docs/AGENTS.md` | NEW | Mirror of nagent's `CLAUDE.md` content: what is Manual Slop; the 4 memory dimensions; the caching strategy; the tier-scoped docs (which `docs/guide_*.md` is for which MMA tier) | The agent-facing mirror of `docs/Readme.md` (which stays human-facing) | +| `./docs/agents/` (optional) | NEW directory | For per-agent-type doc collections if `./docs/AGENTS.md` grows | "another for ./docs if necessary" per the user | + +### 4.2 New styleguides (the manual's style guides) + +| File | Type | Source | Why | +|---|---|---|---| +| `conductor/code_styleguides/agent_memory_dimensions.md` | NEW | Combines v1 §3, §6 + v2.1 §2.1 | Codifies the 4 memory dimensions (curation, discussion, RAG, knowledge) and the rules for when to use each | +| `conductor/code_styleguides/rag_integration_discipline.md` | NEW | Per §2.10 above | Codifies the RAG opt-in default, provenance requirement, and the no-mutation rule | +| `conductor/code_styleguides/cache_friendly_context.md` | NEW | Per §2.2 above | Codifies the stable-to-volatile context ordering, the cache TTL GUI contract, and the per-discussion caching decision | +| `conductor/code_styleguides/knowledge_artifacts.md` | NEW | Per §2.1 above | Codifies the knowledge harvest pattern: category files, provenance, sha256 ledger, digest regeneration, "delete to turn off" | +| `conductor/code_styleguides/feature_flags.md` | NEW | Per §2.7 above | The "delete to turn off" pattern codified as a general feature-flag convention | + +### 4.3 New project docs (the docs/ guides) + +| File | Type | Source | Why | +|---|---|---|---| +| `docs/guide_knowledge_curation.md` | NEW | Per v2.1 §2.1; complements v1's `docs/guide_context_curation.md` | "The third memory dimension" — what the knowledge store is, how it differs from curation/discussion/RAG, how to write to it, how to query it | +| `docs/guide_caching_strategy.md` | NEW | Per v2.1 §2.2; complements `docs/guide_ai_client.md` | "Caching across providers" — the stable-to-volatile pattern, the cache TTL GUI, the per-discussion caching decision | +| `docs/guide_agent_memory_dimensions.md` | NEW | Cross-cutting: curation + discussion + RAG + knowledge | Maps each Manual Slop feature to the memory dimension(s) it uses; useful for new-feature scoping | + +### 4.4 Updates to existing workflow docs + +| File | Type | What changes | Why | +|---|---|---|---| +| `AGENTS.md` | UPDATE | Add `@import` for canonical DOD file | Per nagent's pattern | +| `conductor/workflow.md` | UPDATE | Add TDD protocol sections for: cache TTL control, knowledge harvest, conversation compaction, RAG discipline | The workflow should reflect the new patterns | +| `conductor/product-guidelines.md` | UPDATE | Add a "memory dimensions" section that codifies the 4 dimensions and the rules for which to use when | The product guidelines should reflect the v2.1 framing | +| `docs/guide_mma.md` | UPDATE | Use the new "delegation is context management" framing in the Token Firewalling section | Per §2.9 | +| `docs/guide_ai_client.md` | UPDATE | Add the cache TTL section, the per-discussion caching decision, the cache health panel | Per §2.2 | + +### 4.5 v1 artifacts (preserved, NOT updated in this turn) + +| File | Status | Why | +|---|---|---| +| `conductor/tracks/nagent_review_20260608/report.md` | **Preserved** | v1's 14-section deep-dive. The structure is still correct. v2.1's findings are *additions*, not contradictions. | +| `conductor/tracks/nagent_review_20260608/comparison_table.md` | **Preserved** | Same. | +| `conductor/tracks/nagent_review_20260608/decisions.md` | **Preserved** | The 10 v1 candidates are still real candidates. v2.1 adds 5 new ones. | +| `conductor/tracks/nagent_review_20260608/nagent_takeaways_20260608.md` | **Preserved** | The 10 v1 takeaways are still grounded. v2.1 adds 3 more. | +| `conductor/tracks/nagent_review_20260608/nagent_review_v2_20260612.md` | **Preserved** (v2 draft) | Per the user's instruction "I want to keep this v2 draft" | +| `conductor/tracks/nagent_review_20260608/nagent_review_v2_1_20260612.md` | **This file** (v2.1) | The user-revised version | +| `Readme.md` (project root) | **Preserved** | Human-facing, unchanged | +| `docs/Readme.md` | **Preserved** | Human-facing, unchanged | + +--- + +## 5. v2.1's revised future-track candidate list (15 candidates, up from 10) + +### Candidate 11: Knowledge Memory (third memory dimension) — REFRAMED, HIGH + +**The reframing.** Manual Slop has two strong memory dimensions today (curation, discussion). RAG is opt-in. The new candidate adds a third dimension (knowledge memory) — user-editable, provenance-aware, durable learnings from past sessions. Not a RAG alternative. Not a curation replacement. Not a discussion replacement. A *complement*. + +**What it would do.** A new `src/knowledge_store.py`: +- `KnowledgeStore` class with `add_bullet(category, text, provenance)`, `get_digest(budget_chars=4096)`, `regenerate_digest()`, `delete_digest()` (turn-off switch), `edit_category(category, bullets)` (user-editable) +- `KnowledgeHarvester` class with `harvest_conversation(discussion) -> Result[list[KnowledgeBullet], ErrorInfo]` (LLM call against an editable `prompts/harvest-conversation.md`) +- A new `src/harvest_cli.py` (or GUI panel) that does the dry-run → apply cycle, like `nagent-gc` +- A bounded `{knowledge}` block injected into `aggregate.py:run` initial context — the *stable* position (cache-friendly, per the v2.1 cache ordering) +- A "Knowledge" panel in the GUI (parallel to the Logs Management panel) for browsing, editing, pruning +- Per-file knowledge notes in `~/.manual_slop/knowledge/files/{file_id}.md` (parallel to `FileItem.notes` extension) + +**Per-file knowledge notes (sub-candidate 11.1).** Add `notes: str = ""` to `FileItem` (one-line schema change). The harvest "files" category writes per-file notes keyed by inode. The Structural File Editor gets a "Notes" text area. + +**Where it lives.** Application. + +**Depends on.** `data_oriented_error_handling_20260606` (the `Result`/`ErrorInfo` pattern for the harvest LLM call's return type). + +**Effort.** **Large.** 3-5 phases: (1) KnowledgeStore + digest regeneration, (2) KnowledgeHarvester + harvest-conversation prompt, (3) GUI panel + file picker, (4) aggregate.py integration + cache-position verification, (5) per-file notes + FileItem extension. ~500-800 lines + tests. + +**Recommended priority.** **HIGH** (re-rank from v1; unchanged from v2). + +**Cross-references:** +- v1 §6 (Per-File Memory) — adds a *knowledge* dimension alongside the *curation* dimension. +- `docs/guide_context_curation.md` (existing) — the related-but-different story. +- `docs/guide_rag.md` (existing) — the opt-in third dimension; v2.1's "RAG integration discipline" styleguide codifies when RAG fits. +- `data_oriented_error_handling_20260606` — the harvest LLM call benefits from `Result[str, list[KnowledgeBullet], ErrorInfo]`. + +--- + +### Candidate 12: Caching Strategy (EXPANDED with cache TTL GUI) — MEDIUM + +**The expansion.** v2 had only "stable-to-volatile context ordering." v2.1 adds **cache TTL GUI controls** as a sub-candidate, per the user's explicit ask. + +**Part A: Stable-to-volatile context ordering (sub-candidate 12a).** +- A refactor of `src/ai_client.py:_get_combined_system_prompt` and the Anthropic-specific call site to enforce stable-to-volatile ordering +- **Stable layers** (in order, identical across turns of the same mode): + 1. Role instructions (model + provider) + 2. Tag protocol / tool protocol / function-calling schema + 3. Discovered tool descriptions + 4. System prompt (the user's chosen preset) + 5. Persona profile (if any) + 6. Project context (per `manual_slop.toml` — Candidate 14) + 7. Knowledge digest (if Candidate 11 is built) +- **Volatile layers** (per-turn, not cached): + 8. Instance facts (current discussion, current file items) + 9. Tool-call results from prior turns + 10. The user message +- The boundaries between stable and volatile are passed to Anthropic as `cache_control` breakpoints (mirroring nagent's `cache_prefix_blocks`) + +**Part B: Cache TTL GUI controls (sub-candidate 12b).** +- A "Caching" tab in Operations Hub (parallel to the planned Vendor State tab) +- Per-provider cache configuration: TTL override, model eligibility, default mode (ephemeral vs explicit vs none) +- Per-discussion cache state: which discussion is currently cached, when the cache was created, when it expires +- Cache hit rate per provider (aggregated from `cache_read_input_tokens` + `cache_creation_input_tokens` usage metadata) + +**Where it lives.** Application. + +**Depends on.** None directly. Could leverage `qwen_llama_grok_followup_20260611`'s `send_openai_compatible()` helper for the Anthropic-specific call site. + +**Effort.** Small for 12a (1-2 phases). Medium for 12b (2-3 phases). + +**Recommended priority.** **MEDIUM** (unchanged from v2). The user explicitly wants the GUI controls, so 12b is part of this candidate. + +**Cross-references:** +- `docs/guide_ai_client.md` §"Anthropic ephemeral + Gemini explicit caching" — the existing pattern. +- v1 §5 (The Loop) — the loop's append/parse/act structure is the volatility; the cache lives in the stable prefix. + +--- + +### Candidate 13: Conversation Compaction — MEDIUM (unchanged from v2) + +**Where it lives.** Application. The Compress button in the GUI is currently summarization; Compaction is a separate "Compact" button next to it. + +**Effort.** Small to medium (1-2 phases). + +**Recommended priority.** **MEDIUM** (unchanged from v2). + +**Cross-references:** +- v1 §3 (Editable State) — the "compaction guidance is user-editable" pattern parallels nagent's `prompts/compact-conversation.md`. +- v1 §15.2 (Provider-specific history in process globals) — compaction might be a stepping stone to the Stateless LLMClient refactor (Candidate 3): if the conversation is compacted to a known shape, the projection of `disc_entries` to provider history becomes trivial. + +--- + +### Candidate 14: Project Context File — LOW (unchanged from v2) + +**Where it lives.** Application. `aggregate.py:run` is the consumer. + +**Effort.** Small (1 phase). ~100 lines + a documentation note. + +**Recommended priority.** **LOW** (unchanged from v2). + +--- + +### Candidate 15: Save-with-Graceful-Summary-Failure — TBD (unchanged from v2) + +**Where it lives.** Application. + +**Effort.** **Small** (1 phase) IF the current behavior is "raise on failure." Trivial (just a test) IF the current behavior is "fall back to original." + +**Recommended priority.** **TBD** — MEDIUM if the current behavior is destructive (it would be a latent bug). LOW if not. Verification first. + +--- + +### NEW Candidate 16: AGENTS.md `@import` Pattern + Canonical DOD File — HIGH + +**The user's swap instruction.** nagent added a `CLAUDE.md` that imports `context/data-oriented-design.md` via `@import`. Manual Slop has `AGENTS.md` (existing) but no canonical rules file to import. The candidate: + +1. Create `conductor/code_styleguides/data_oriented_design.md` (cloned/adapted from nagent's `context/data-oriented-design.md`) +2. Update `AGENTS.md` to add `@conductor/code_styleguides/data_oriented_design.md` at the top +3. Create `./docs/AGENTS.md` (the agent-facing mirror of `docs/Readme.md`) +4. Add the same canonical file to `manual_slop.toml` `[agent.context_files]` (or equivalent) so the Application's RAG / context assembly picks it up + +**Why this is HIGH priority.** Without the canonical file: +- The new styleguides (knowledge, caching, RAG discipline) lack a home +- The AGENTS.md file is just a thin pointer to itself (no actual rules) +- The agent-facing docs mirror the human-facing docs by *importing* the same rules + +**Where it lives.** Both (project root, `docs/`, `conductor/code_styleguides/`). + +**Effort.** Small to medium (1-2 phases). The canonical file is mostly an adaptation of nagent's; the `@import` line is one line; the `docs/AGENTS.md` is parallel to the existing `docs/Readme.md`. + +**Recommended priority.** **HIGH** (re-rank from LOW in v2, because the user's AGENTS.md swap is explicit and foundational). + +**Cross-references:** +- `CLAUDE.md` in nagent (the pattern source) +- `context/data-oriented-design.md` in nagent (the canonical content) +- `AGENTS.md` in Manual Slop (the existing file that gets the `@import` line) +- `docs/Readme.md` in Manual Slop (the human-facing file that stays human-facing; `./docs/AGENTS.md` is the agent mirror) + +--- + +## 6. v2.1's revised comparison table (5 new rows, 4 updates) + +| # | nagent pattern | Manual Slop equivalent (v2.1) | Verdict (v2.1) | Domain | New candidate | +|---|---|---|---|---|---| +| 1-5, 10, 12-14 | (existing v1 rows, mostly unchanged) | ... | ... | ... | ... | +| **3 (editable state)** | `--compact`, `--branch-conversation`, editable compaction prompt | Manual Slop has Take/branching + per-entry edit + UISnapshot; **has summarization, not compaction** | PARITY (DIFFERENT FOCUS) on editing; GAP on compaction | APP | 13 | +| **6 (per-file memory)** | per-file conversation + per-file knowledge notes | `FileItem` + `ContextPreset` (curation) + Fuzzy Anchors; **no `notes` field; no per-file knowledge** | PARITY (DIFFERENT KIND) on curation; GAP on notes | APP | 11 (notes sub-task) | +| **9 (sub-conversations)** | `` worker reuse; "delegation is context management" | MMA worker pool (subprocess) + 1:1 gap; new framing in docs | PARITY for MMA; GAP for 1:1; design pattern update | APP | 1 (unchanged) | +| **NEW: knowledge harvest** | `nagent-gc` → `~/.nagent/knowledge/` with provenance + sha256 ledger + digest | **THIRD memory dimension** alongside curation + discussion; RAG is opt-in and not the comparison | **GAP (Application)** | APP | **11** | +| **NEW: prompt caching strategy** | `bin/nagent:970-1014` computes boundaries; `nagent_llm.py:cache_prefix_blocks` injects `cache_control` | Manual Slop has the mechanism (`_add_history_cache_breakpoint`); ordering not enforced; no cache TTL GUI | **PARTIAL (mechanism); GAP (UX)** | APP | **12a + 12b** | +| **NEW: conversation compaction** | `--compact` with editable `prompts/compact-conversation.md` | Manual Slop has `run_discussion_compression` (summarize, not compact) | **GAP (Application)** | APP | **13** | +| **NEW: project context files** | `context.yaml` at git toplevel, install → project → root | `manual_slop.toml` per-project (TOML, different syntax) | **PARITY (DIFFERENT MECHANISM)** | APP | 14 | +| **NEW: AGENTS.md `@import` pattern** | nagent `CLAUDE.md` → `context/data-oriented-design.md` | Manual Slop has `AGENTS.md` but no canonical rules file; `./docs/AGENTS.md` not created | **GAP (Application)** | BOTH | **16** | +| **NEW: cache TTL exposure** | (nagent doesn't expose TTL; providers do) | Manual Slop has Anthropic ephemeral + Gemini explicit; no GUI for TTL or per-discussion cache | **GAP (UX)** | APP | **12b** | +| **NEW: RAG integration discipline** | (n/a — nagent has no RAG) | Manual Slop's RAG is opt-in; no codified discipline for when/where to wire it | **GAP (documentation)** | BOTH | New styleguide | + +--- + +## 7. Staleness in v1 (revisited) + +The v2 staleness map (§4 in v2) is still mostly correct. v2.1 adds: + +- **§6 (Per-File Memory) of v1 is now MORE relevant.** The per-file knowledge notes pattern is a *new* dimension that complements v1's per-file curation. The v1 review said Manual Slop is "STRONGER in curation dimension" — true, but the *knowledge notes* dimension is absent. v2.1's Candidate 11 (with sub-task 11.1) addresses this. + +- **§3 (Conversations Are Editable State) of v1 needs a new sub-section on compaction** (per v2.1 Candidate 13). + +- **§7 (Repository History) of v1 is the analog of the new knowledge harvest.** Both are "preserve and project durable inputs." v2.1 makes this cross-reference explicit. + +- **The new pattern table in v2.1's §6 is the flat reference** for the comparison. The v1 `comparison_table.md` is still correct; v2.1's 5 new rows are *additions* to that table. + +--- + +## 8. What v2.1 changes vs v2 (the delta) + +| Section in v2 | What v2.1 changes | +|---|---| +| §0 (TL;DR) | Added "reframed" labels; re-ordered rows; added cache TTL row; added RAG discipline row | +| §1 (8 new commits) | **PROMOTED** to §1 (unchanged) | +| §2.1 (Knowledge Harvest) | **REFRAMED** from "RAG alternative" to "third memory dimension"; added detailed comparison of 4 memory dimensions; added source-level citations | +| §2.2 (Prompt Caching) | **EXPANDED** to include cache TTL GUI controls (new sub-candidate 12b) | +| §2.3 (Compaction) | Added source-level citations (compaction prompt's self-review checklist, the artifact-knowledge-preservation rules) | +| §2.4 (Project Context) | **SWAPPED** CLAUDE.md → AGENTS.md; added the `docs/AGENTS.md` (new) proposal | +| §2.5-§2.9 (other patterns) | Mostly unchanged; added source-level citations | +| §2.10 (RAG integration discipline) | **NEW** — per the user's "conservative" instruction | +| §3 (13-step list) | Unchanged | +| §4 (v1 staleness) | Updated; added "RAG is not the comparison" note | +| §5 (What's still correct) | Unchanged | +| §6 (future-track candidates) | **EXPANDED** from 11 to 16 (added 12b, 16); reframed 11 | +| §7 (impact on existing candidates) | Updated to reflect new priority re-rankings | +| §8 (verification needs) | Updated; some items now have source-level evidence (e.g., Candidate 12 is now grounded in `bin/nagent:970-1014`) | +| §9 (recommended next steps) | Updated to reflect the new docs/workflow update plan (§4 above) | +| **NEW §10: source reads in full** | Lists the 13 source files I read in full with key file:line citations | +| **NEW §4 (in v2.1): proposed new artifacts** | The full list of new files for the next turn (5 new docs + 1 new styleguide dir + AGENTS.md update + workflow doc updates); explicit "do not touch" list for v1 artifacts and human Readmes | +| §10 (v2 references) | Renumbered to §11; updated | + +--- + +## 9. Recommended next steps (revised) + +1. **You review v2.1** and confirm Candidate 16 (AGENTS.md `@import` + canonical DOD file) is the right place to start. This is the foundation; the other styleguides (knowledge, caching, RAG) all need the canonical file. + +2. **I create the canonical DOD file** at `conductor/code_styleguides/data_oriented_design.md` (cloned from nagent's `context/data-oriented-design.md`, adapted to Manual Slop's context). This is a 1-2 hour task; no code changes. + +3. **I update `AGENTS.md`** to add the `@conductor/code_styleguides/data_oriented_design.md` line at the top, plus a "what this is" section that mirrors the nagent CLAUDE.md content but for Manual Slop. 1-2 hours. + +4. **I create `./docs/AGENTS.md`** as the agent-facing mirror of `docs/Readme.md` (the human-facing docs index stays human-facing). The new file explains: which `docs/guide_*.md` is for which MMA tier; the 4 memory dimensions; the caching strategy; the styleguide index. 1-2 hours. + +5. **I write the 5 new styleguides** at `conductor/code_styleguides/`: + - `agent_memory_dimensions.md` + - `rag_integration_discipline.md` + - `cache_friendly_context.md` + - `knowledge_artifacts.md` + - `feature_flags.md` (or fold into `data_oriented_design.md`) + +6. **I write the 3 new project docs** at `docs/`: + - `guide_knowledge_curation.md` + - `guide_caching_strategy.md` + - `guide_agent_memory_dimensions.md` + +7. **I update the existing workflow docs**: + - `conductor/workflow.md` (add TDD protocol for the new patterns) + - `conductor/product-guidelines.md` (add the memory dimensions section) + - `docs/guide_mma.md` (use the new "context management" framing) + - `docs/guide_ai_client.md` (add cache TTL section) + +8. **After integration**, I update `conductor/tracks.md` to reflect the new artifacts and the v2.1 framing. The v1 track stays as-is (preserved). + +9. **Verification of Candidate 15 (save-with-graceful-summary-failure)** can be done in parallel by reading `src/ai_client.py:run_discussion_compression`. Cheap source-read; high potential value. + +--- + +## 10. v2.1 references + +- **nagent source:** https://github.com/macton/nagent (at commit `eb6be32a`, 2026-06-12 00:25:50 UTC) +- **nagent v2 README:** https://github.com/macton/nagent/blob/main/README.md +- **nagent v2 commits:** `2c3c78b` (compaction), `67a3ea5` (knowledge harvest + tag parser + claude-code), `d86bce8` (CLAUDE.md), `ee72cb4` (README rewrite), `5e269ca` (project context + prompt caching + conversation direction) +- **v2 review (draft, preserved):** `conductor/tracks/nagent_review_20260608/nagent_review_v2_20260612.md` (~68KB) +- **v1 review (preserved):** `conductor/tracks/nagent_review_20260608/report.md` + `comparison_table.md` + `decisions.md` + `nagent_takeaways_20260608.md` +- **nagent source files read in full for v2.1:** `bin/nagent`, `bin/helpers/nagent_gc_lib.py`, `bin/helpers/nagent_tags.py`, `bin/helpers/nagent_llm.py`, `bin/helpers/nagent_cli.py`, `bin/helpers/nagent_file_edit_lib.py`, `bin/helpers/nagent_file_split_lib.py`, `bin/helpers/nagent_file_patch_lib.py`, `bin/helpers/nagent_file_summarize_lib.py`, `bin/nagent-gc`, `bin/nagent-llm-text`, `prompts/compact-conversation.md`, `prompts/harvest-conversation.md`, `context/data-oriented-design.md`, `CLAUDE.md`, `context.yaml`, `requirements.txt`, `config.example.json` + +End of v2.1 report. diff --git a/conductor/tracks/nagent_review_20260608/nagent_review_v2_20260612.md b/conductor/tracks/nagent_review_20260608/nagent_review_v2_20260612.md new file mode 100644 index 00000000..ef74b982 --- /dev/null +++ b/conductor/tracks/nagent_review_20260608/nagent_review_v2_20260612.md @@ -0,0 +1,820 @@ +# nagent Review v2: 2026-06-08 → 2026-06-12 Updates + +**Track:** `nagent_review_20260608` +**Date:** 2026-06-12 +**Author:** Tier 1 Orchestrator +**Companion to:** `report.md` (v1, 2026-06-08), `comparison_table.md`, `decisions.md`, `nagent_takeaways_20260608.md` +**Purpose:** Document what's new in Mike Acton's nagent repo since the v1 review and identify what in the v1 artifacts needs updating (without deleting them). + +> **Reading note.** v1 reviewed nagent at commit `28a6a87c` ("Fix conversation delegation and token accounting," 2026-06-08 06:41:39 UTC). v2 reviews it at commit `eb6be32a` ("Remove resolved issue files," 2026-06-12 00:25:50 UTC). That's **8 commits and 4 days of work**. The README has been completely restructured (14 sections → 7 Parts with 14 numbered sections, but reorganized into a teaching arc). A new major pattern (**knowledge harvest**) has been added. A new provider (**claude-code**) has been added. New sub-commands (`--compact`, `--branch-conversation`) are now first-class. Prompt caching is now explicit. The 12-step Build Your Own list is now 13 steps. +> +> This is a **focused delta report**, not a full re-review. Sections of the v1 report that haven't changed materially are noted in §5 ("What is still correct in v1"). Sections that need updates are noted in §4 ("Staleness in v1"). + +--- + +## 0. TL;DR + +| New in nagent | Manual Slop equivalent | Verdict | New future-track candidate? | +|---|---|---|---| +| **Knowledge harvest** (`nagent-gc` → `~/.nagent/knowledge/` with provenance, sha256 ledger, bounded digest) | `src/rag_engine.py` (ChromaDB, no provenance, not user-editable) | **GAP (Application). Manual Slop's RAG is fuzzy + opaque; nagent's knowledge store is exact + editable + provenance-aware.** | **YES** — Candidate 11 | +| **Prompt caching with stable-to-volatile context ordering** (`--cache-prefix-chars` to `nagent-llm-text`, anthropic splits at offsets) | `src/ai_client.py:_add_history_cache_breakpoint`, `_send_anthropic` already uses `cache_control` blocks | **PARTIAL (Application). Caching is in place; stable-to-volatile context ORDERING is not enforced.** | **YES** — Candidate 12 | +| **Conversation compaction** (`--compact` with `prompts/compact-conversation.md` editable guidance) | `src/gui_2.py:4252` `Compress` button → `app_controller._handle_compress_discussion:3357` → `ai_client.run_discussion_compression` | **GAP (Application). Manual Slop has summarization, not behavior-preserving compaction.** | **YES** — Candidate 13 | +| **Project context files** (`context.yaml`/`context.md` at git toplevel, injected install → project → root) | `manual_slop.toml` per-project + `paths.py` per-project overrides | **PARITY (DIFFERENT MECHANISM). Manual Slop uses TOML; nagent uses markdown/YAML. Same intent, different syntax.** | **MAYBE** — Candidate 14 (if user wants markdown) | +| **claude-code provider** (subscription auth via Claude Agent SDK, `default` model = local config) | `src/ai_client.py:_send_gemini_cli` (similar pattern: local CLI auth) | **PARITY.** Same pattern (local-subprocess/subscription auth) as Gemini CLI. | No — already covered by MiniMax follow-up | +| **Per-file knowledge notes** (`knowledge/files/{file_id}.md`, mirrored from harvest) | `models.FileItem` (no notes field) | **GAP (Application). FileItem has 9 fields; no free-form notes per file.** | Bundle with Candidate 11 | +| **"Delete to turn off" feature flags** (`delete digest.md` → injection stops) | `[ai_settings.toml]` toggles, GUI checkboxes | **PARITY (DIFFERENT MECHANISM).** Manual Slop uses config; nagent uses file presence. | No — design pattern note, not a track | +| **Save-with-graceful-summary-failure** (summary LLM fails → save still completes, `(summary unavailable)` marker) | `ai_client.run_discussion_compression` — behavior on LLM failure unknown without source read | **UNKNOWN.** Needs source read. | Maybe — bundle with Candidate 13 | +| **Delegation reframed as "context management, not parallelism"** | `src/multi_agent_conductor.py` (already does this implicitly via subprocess + Context Amnesia) | **PARITY (NEW FRAMING).** | No — design pattern note | + +**Verdict in one sentence:** The v2 nagent changes add **one major new pattern (knowledge harvest)** that competes with Manual Slop's RAG, **three smaller patterns (prompt-cache ordering, conversation compaction, per-file notes)** that have direct Manual Slop equivalents or gaps, and **one structural change (claude-code provider)** that mirrors the existing Gemini CLI pattern. The original 14-section deep-dive is *still mostly correct* — the changes are additions, not contradictions. The most actionable update is adding the knowledge-harvest pattern to the future-track candidate list. + +--- + +## 1. The 8 new commits (chronological) + +From `https://api.github.com/repos/macton/nagent/commits?per_page=30`, the 8 new commits since v1: + +| # | Date (UTC) | Commit | Subject | +|---|---|---|---| +| 1 | 2026-06-11 03:32:50 | `2c3c78b` | Add conversation compaction and restore initial context on load. | +| 2 | 2026-06-11 23:09:57 | `67a3ea5` | Add knowledge harvest, tag parser, and claude-code provider. | +| 3 | 2026-06-11 23:10:12 | `d86bce8` | Add CLAUDE.md importing the shared data-oriented design rules. | +| 4 | 2026-06-11 23:10:12 | `ee72cb4` | Rewrite README prompt around a teaching arc and regenerate README. | +| 5 | 2026-06-12 00:17:34 | `0b9d1a2` | Ignore scratch files. | +| 6 | 2026-06-12 00:17:34 | `5e269ca` | Add project context, prompt caching, and conversation direction. | +| 7 | 2026-06-12 00:17:34 | `99e1270` | Regenerate README for project context, caching, and conversation direction. | +| 8 | 2026-06-12 00:25:50 | `eb6be32` | Remove resolved issue files. | + +The 4 substantive commits are #1, #2, #6, and #4. Commits #3, #5, #7, #8 are companion/cleanup work. + +### 1.1 The 4 substantive commits (long-form messages) + +**Commit `2c3c78b` — Add conversation compaction and restore initial context on load** (2026-06-11 03:32:50) +> Introduce `--compact` with compaction guidance, preserve initial_context through edit flows, and ensure loaded conversations regain protocol preamble when missing. +> Co-authored-by: Cursor + +**Commit `67a3ea5` — Add knowledge harvest, tag parser, and claude-code provider** (2026-06-11 23:09:57) — **the big one** +> - `nagent-gc`: classify dead artifacts; harvest facts/decisions/tasks/questions/playbooks into `~/.nagent/knowledge/` with provenance and a sha256 ledger gate; inject a bounded digest into initial context; dry-run by default (design: issues/gc-knowledge-harvest.md) +> - `nagent_tags.py`: explicit parser for the tag protocol replacing regex parsing; block helpers remove re.sub escape hazards +> - claude-code provider via the Claude Agent SDK using the local Claude Code login; omitted model or "default" means Claude Code's configured model +> - Install context: load context.yaml/context.md from the nagent folder before root context; ship `context/data-oriented-design.md` via repo context.yaml +> - Fix re.sub escape corruption in `refresh_initial_context`, O(n^2) splitter scoring (13.6s → 0.008s on a 100KB cpp file), binary reads crashing the loop, pid drift between nagent and nagent-file-edit, and write-path expanduser mismatch +> - Save-conversation indexes the copy even when the summary LLM fails; fresh conversations build initial context once; compact prompt resolves root-first; edit/compact roll up child token stats; gc progress spinner and per-item status lines + +**Commit `5e269ca` — Add project context, prompt caching, and conversation direction** (2026-06-12 00:17:34) — **the second big one** +> - Initial context restructured stable-to-volatile: role instructions and the tag protocol (with inline per-tag guidance) lead; instance facts and environment trail, so request prefixes stay byte-identical across conversations of the same mode +> - Protocol rules stated outright: raw bodies, first-close-wins, nothing outside tags, the loop contract (results appended, never fabricate), and errors-as-data +> - New conversations-as-data block directs the model to reuse named workers (`conversation-file="name"`), resume saved conversations, author worker briefings under /tmp, and hand off to a fresh sub-conversation when its own context grows noisy +> - Project context: a `context.yaml`/`context.md` at the git toplevel of the working directory is injected between install and root context, deduplicated when the project is the install or root directory +> - Provider prompt caching: `call_llm` passes stable prefix boundaries via `--cache-prefix-chars`; the anthropic provider splits the message into cache_control blocks at those offsets; cached prompt tokens fold back into reported input counts (issues/provider-prompt-caching.md) + +**Commit `ee72cb4` — Rewrite README prompt around a teaching arc and regenerate README** (2026-06-11 23:10:12) +> The prompt now organizes the README as a progression: build it, rename it, own the data, exploit the files, name the principles, the data structures that fall out (neighborhoods, context and large files, per-file conversations), and the framework comparison. Coverage updated for knowledge harvest, install context, the shared tag parser, compaction/branching, and the claude-code provider. README regenerated from the revised prompt. + +**Implication of the README restructuring.** The v1 report mapped nagent's 14 sections to Manual Slop features in a 1:1 fashion. v2 nagent's README re-frames the same 14 patterns as a *teaching arc* (build → rename → own → exploit → name → apply → compare). The new organization emphasizes **consequences** of the data-oriented stance (harvest, compaction, project context, prompt caching) over the original 14-as-a-list framing. The substance is mostly the same; the *framing* has shifted toward "what files buy you." + +--- + +## 2. The new patterns (deep-dive) + +### 2.1 Knowledge harvest (`nagent-gc`) — THE major new pattern + +**nagent's claim.** Dead conversations accumulate, and deleting them loses what was learned. Therefore: distill, then delete — and feed the distillate back in. This is the strongest version of the "files create opportunities" argument. Session state that other tools discard becomes compounding, user-editable knowledge. + +**nagent's implementation** (from the README's new §8, "Harvest Knowledge, Reclaim Space"): + +1. `nagent-gc` scans the nagent root and classifies every artifact: + - **Live conversations** (still in use) + - **User-kept saves** (named, kept explicitly) + - **Prunable** (stale splits, dead index entries) + - **Harvest candidates** (conversation archives, delegated sub-conversations, per-file conversations whose target file is gone) + - **Unknown** is *kept, never deleted* +2. For each harvest candidate, an LLM pass driven by the user-editable `prompts/harvest-conversation.md` extracts: + - **Facts** + - **Decisions** + - **Completed tasks** + - **Open tasks** + - **Open questions** + - **Playbooks** +3. Output goes to category files under `~/.nagent/knowledge/`. **Every bullet carries provenance** (`[from: conversation, date]`). +4. Notes tied to a specific file mirror into `knowledge/files/{file_id}.md`. +5. **Deletion is gated** on a sha256 entry in `knowledge/ledger.json` proving the harvest happened. +6. **Identical content never pays the LLM twice** (the sha256 ledger gate). +7. A bounded `digest.md` (open tasks and questions first, newest first) regenerates from the category files — never from raw conversations, so the user's edits to the category files propagate. +8. The digest is injected into every conversation's initial context as a `{knowledge}` block. +9. **Delete `digest.md` → injection turns off. That is the whole switch.** +10. **Dry run is the default** and prints the classification table plus the estimated harvest cost in tokens before anyone pays it. + +**The CLI surface** (from the README's "Common Commands"): +```bash +nagent-gc # dry run: classify, estimate cost +nagent-gc --apply # harvest into ~/.nagent/knowledge/, reclaim +nagent-gc --apply --no-harvest # reclaim only, no LLM pass +``` + +**The key design properties** (the things that make this pattern a Manual Slop candidate): + +| Property | How it works | Why it matters | +|---|---|---| +| **Provenance** | Every bullet has `[from: conversation, date]` | Auditable, traceable, user can verify | +| **User-editable** | The category files are plain markdown, not a vector store | User can correct wrong "facts" before any model sees them | +| **Bounded digest** | The `digest.md` is byte-capped before injection | Caching-friendly (stable prefix); context-budget-friendly | +| **Delete to turn off** | `rm digest.md` → no injection | Zero-config opt-out; the file is the switch | +| **sha256 ledger gate** | Deletion requires proof of harvest | Lossless: you cannot delete a conversation that hasn't been distilled | +| **Dry run default** | `nagent-gc` without `--apply` does nothing destructive | Safe by default | +| **Per-file mirror** | Notes about a specific file go to `knowledge/files/{file_id}.md` | Per-file memory becomes first-class (extends §13 of v1) | +| **Digest regenerates from category files, not raw** | Edits to category files propagate to digest on next regen | The "knowledge" is a layer, not a snapshot | + +**Manual Slop's current state** (RAG, the closest existing pattern): + +| Aspect | `src/rag_engine.py` (Manual Slop) | `nagent-gc` (knowledge harvest) | +|---|---|---| +| Storage | ChromaDB (vector store) | `~/.nagent/knowledge/*.md` (markdown files) | +| Provenance | Path + chunk (no conversation-of-origin) | `[from: conversation, date]` per bullet | +| User-editable | No (the vector store is opaque) | Yes (markdown is a text file) | +| Opt-out | GUI toggle (`rag_enabled` setting) | `rm digest.md` | +| Cost control | Embedding + vector storage | sha256 ledger gate; identical content is free | +| Update mechanism | Re-index on mtime change | Manual edit + regenerate digest | +| Deduplication | No explicit dedup | sha256 ledger prevents re-harvest of identical content | +| Auditability | Low (vector similarity, no source-link UI) | High (every bullet has provenance) | +| Per-file notes | None (FileItem has no `notes` field) | `knowledge/files/{file_id}.md` | +| Pattern: "knowledge" is a *layer*, not a *snapshot* | No (each indexing is fresh) | Yes (digest regenerates from category files) | + +**Verdict.** **GAP (Application).** Manual Slop's RAG is *useful* but is the wrong shape for "what did we learn from past sessions that we want to inject as stable knowledge." RAG is for *semantic retrieval at query time*; the knowledge harvest is for *durable, auditable, user-editable knowledge* that gets injected as a stable prefix. The two are *complementary*, not substitutable, but Manual Slop has only the first. + +**Domain tag:** Both (Application for the user-facing knowledge store; Meta-Tooling for the harvest/regen cycle that external agents could trigger). + +**Effort:** Large (3-5 phases). The RAG engine is 384 lines; the knowledge store would be ~200-400 lines + the harvest/regen CLI + a new GUI panel. The audit/integrity story is non-trivial. + +**Recommended priority:** **HIGH (re-rank from v1).** This is the single most important new pattern in v2 nagent. The user has not yet seen it; surfacing it as a Candidate 11 in `decisions.md` is the v2 report's primary actionable output. + +**Cross-references:** +- v1 §6 (Per-File Memory) — the per-file knowledge notes (`knowledge/files/{file_id}.md`) are a *new dimension* of per-file memory that v1 didn't capture. Could be bundled with Candidate 11. +- v1 §7 (Repository History) — the knowledge harvest covers conversations; the git history covers code. Both are "durable, explicit inputs" — same pattern, different data. +- `data_oriented_error_handling_20260606` — the knowledge harvest is data-oriented in the Fleury sense (no control flow; the LLM extraction is a transformation over files). + +--- + +### 2.2 Prompt caching with stable-to-volatile context ordering + +**nagent's claim.** Context windows are a budget, but cache hit rate is the multiplier. The initial context's *ordering* determines cache effectiveness: stable prefix + volatile suffix means providers that cache on block boundaries (Anthropic) can reuse the shared context across conversations of the same mode. + +**nagent's implementation** (from the commit message of `5e269ca` and the new README's §1, §2, §3): +- `build_initial_context()` assembles context in **stable-to-volatile order**: role instructions and tag protocol first, context-management rules, discovered tool descriptions, install context, project context, root context, knowledge digest, and instance facts and environment *last*. +- "Stable-to-volatile on purpose: request prefixes stay byte-identical across conversations of the same mode." +- `call_llm` passes stable prefix boundaries via `--cache-prefix-chars` to `nagent-llm-text`. +- The Anthropic provider splits the message into `cache_control` blocks at those offsets. +- **Cached prompt tokens fold back into reported input counts** ("accounting still means 'tokens sent'"). + +**The "stable-to-volatile" ordering pattern in detail.** The context is layered like an onion: + +``` +[stable] role instructions +[stable] tag protocol (with inline per-tag guidance) +[stable] context-management and write rules +[stable] discovered tool descriptions +[stable] install context (nagent's own context.yaml) +[stable] project context (repo's context.yaml) +[stable] root context (~/.nagent/context.yaml) +[stable] knowledge digest (regenerates on gc, but is stable within a gc cycle) +[volatile] instance facts +[volatile] environment +[volatile] conversation history (changes every turn) +``` + +The first ~6 layers are stable across conversations of the same mode (same persona, same provider, same model = same conversation mode). The volatile suffix is per-conversation. Anthropic's `cache_control` breakpoints are placed at the boundary between stable and volatile, so the entire stable prefix is cached. + +**Manual Slop's current state** (per `src/ai_client.py:2883` summary): + +| Aspect | Manual Slop | nagent v2 | +|---|---|---| +| `cache_control` use | `_add_history_cache_breakpoint`, `_strip_cache_controls`, `_build_chunked_context_blocks` exist | `--cache-prefix-chars` + `cache_control` blocks | +| Stable prefix optimization | NOT explicit (the history-breakpoint approach is *temporal*, not *spatial*) | Explicit (stable layers first, volatile last) | +| Cached token accounting | Likely not folded back into input count | Folded back: "accounting still means 'tokens sent'" | +| System prompt + tool description | These go early in `_send_anthropic` calls but order is not formally enforced | Formally enforced by `build_initial_context` | + +**Verdict.** **PARTIAL (Application).** Manual Slop has the cache control *mechanism* (Anthropic ephemeral caching, Gemini explicit caching per `docs/guide_ai_client.md`) but does not have the stable-to-volatile *ordering discipline* that nagent makes explicit. The cost: cache hit rate depends on whether the *first N tokens* are stable across turns, which is currently incidental rather than designed. + +**The Anthropic detail** (worth a deep read): `src/ai_client.py` has `_ANTHROPIC_CHUNK_SIZE`, `_ANTHROPIC_MAX_PROMPT_TOKENS`, `_build_chunked_context_blocks`. These suggest Manual Slop's anthropic path *does* chunk content into multiple blocks (likely for prompt-size management), but whether those chunks align with stable/volatile boundaries — and whether the `cache_control` markers are at the *right* offsets — needs source verification. + +**Domain tag:** Application (the AI client is the Application's main AI entry point). + +**Effort:** Small (1-2 phases) IF the current cache_control calls are mostly correct. Medium (2-3 phases) if the ordering needs refactoring across the 5 providers. + +**Recommended priority:** **MEDIUM.** Real cost savings; depends on whether current Manual Slop usage actually achieves good cache hit rate (would need a measurement pass first). + +**Cross-references:** +- `docs/guide_ai_client.md` §"Anthropic ephemeral + Gemini explicit caching" — the existing pattern. +- v1 report §5 (The Loop) — the loop's "append, call, parse, act, repeat" pattern is the same shape as a cache-stable prefix + volatile suffix; the loop is the volatility. +- `qwen_llama_grok_followup_20260611` — added the `send_openai_compatible()` helper, which is the right shape for a provider-agnostic cache_control injection point. + +--- + +### 2.3 Conversation compaction (`--compact`) + +**nagent's claim.** Summarization loses detail. Compaction rewrites the conversation against user-editable guidance, *preserving* the relevant content. Different tool, different purpose. + +**nagent's implementation** (from the commit `2c3c78b` and the README's new §6): +- `--compact` is `--edit-conversation` driven by the user-editable `prompts/compact-conversation.md`. +- The compaction prompt is **user-editable** at the install level (`~/.nagent/prompts/compact-conversation.md` overrides the shipped version, root-first resolution). +- "Edit/compact roll up child token stats" — the compaction output preserves the recursive token rollup from sub-conversations (so the child runs are still auditable in the parent's accounting). +- Compaction preserves `initial_context` through edit flows — so the stable prefix stays stable. +- "Loaded conversations regain protocol preamble when missing" — the load path re-injects the preamble if it's been stripped. + +**The distinction from summarization:** +- **Summarize** = produce a short description of the conversation (one-way; lossy) +- **Compact** = rewrite the conversation to be shorter, *preserving* the same shape and intent (lossy on word count, lossless on structure) +- nagent's `--compact` is closer to "edit the conversation file to be smaller while keeping the reasoning intact." + +**Manual Slop's current state** (`src/gui_2.py:4252` `Compress` button): +> "Compress" button → `app_controller._handle_compress_discussion:3357` → `ai_client.run_discussion_compression` + +The button calls `ai_client.run_discussion_compression(disc_text)` and replaces the discussion with the LLM's compressed version. This is **summarization, not compaction** — it's a one-shot LLM call that produces a shorter text, but the shape is "a single string from the LLM" rather than "a rewritten conversation that preserves the structure." + +**Verdict.** **GAP (Application).** Manual Slop has summarization; it does not have behavior-preserving compaction. + +**The "behavior-preserving" distinction matters** because: +- A summary loses the *back-and-forth* shape of the discussion. The LLM sees one long string instead of turn-by-turn messages. +- A compaction would re-render the conversation with shorter turns, preserving the multi-turn structure that the LLM's chat completion API expects. +- A summary can be regenerated; a compaction cannot (it is the conversation). + +**Domain tag:** Application. The Compress button is in the GUI; the underlying call is in the AI client. + +**Effort:** Small (1 phase) if the existing `run_discussion_compression` is restructured to produce a compacted multi-turn shape. Medium (2 phases) if a user-editable prompt is added (parallel to the harvest-compaction prompt in nagent). + +**Recommended priority:** **MEDIUM.** Worth doing; not as urgent as Candidate 11 (knowledge harvest). + +**Cross-references:** +- v1 takeaways §6 (Visible retry on protocol failure) — the "self-correction entry as a System role" pattern could be combined with compaction (compaction adds a "compaction marker" entry). +- v1 §3 (Editable State) — the editable conversation guidance is a v2 nagent pattern that v1 didn't capture. + +--- + +### 2.4 Project context files + +**nagent's claim.** Per-project context travels with the repo. When you `cd` into a project, nagent picks up the project's `context.yaml`/`context.md` automatically. Different projects can have different "personality" without forking the nagent install. + +**nagent's implementation** (from the README's new §6): +- `load_root_context()` reads `~/.nagent/context.yaml` or `context.md`; YAML can be a list or `{ "paths": [...] }`; nested `context.yaml` files expand recursively. +- **Install context** from the nagent folder itself (the parent of `bin/`) — this repository ships `context.yaml` pointing at `context/data-oriented-design.md`, the operating rules every conversation starts with. +- **Project context**: when nagent runs inside a git repository, a `context.yaml`/`context.md` at that repository's toplevel is included — per-project instructions that travel with the repo. +- **Injection order: install → project → root.** "The more personal context can override the more general; when the project toplevel *is* the install or root directory (e.g. running nagent from its own checkout), the file is included once, not twice." + +**Manual Slop's current state** (per `docs/paths.py` and `src/paths.py`): +- Per-project `manual_slop.toml` is the source of truth (per the *Comprehensive Path Mapping & Tooling* track). +- `paths.py` supports `[conductor].dir` override for project-specific conductor paths. +- Project context is *configuration* (TOML), not *operating rules* (markdown). + +**Verdict.** **PARITY (DIFFERENT MECHANISM).** Manual Slop has project-scoped configuration (TOML); nagent has project-scoped operating rules (markdown/YAML). Same intent, different syntax and different scope: +- nagent's `context.yaml` injects *prompt text* (operating rules, persona directives, knowledge) +- Manual Slop's `manual_slop.toml` injects *config* (paths, presets, hooks) + +**The gap** is that Manual Slop doesn't have a project-level prompt-injection mechanism. If the user wants a project's `manual_slop_context.md` to add "always be terse; prefer 200-line responses; focus on file X" — there is no current way to do that without editing the system prompt preset. + +**Domain tag:** Both (the file format is per-project; the renderer is in the App's prompt assembly). + +**Effort:** Small (1 phase) — a new `[context_files]` section in `manual_slop.toml` (or a `manual_slop_context.md` file at the project toplevel) read by `aggregate.py:run` at discussion start. + +**Recommended priority:** **LOW (but easy).** Could be done in a few hours as a follow-on to Candidate 12 (caching) since both touch `aggregate.py:run` ordering. + +**Cross-references:** +- v1 §1 (Durable Work) — the "data is the thing" philosophy says project context should be a file, not a GUI setting. nagent v2 makes this explicit. + +--- + +### 2.5 claude-code provider (5th provider, subscription auth) + +**nagent's claim.** A user with a Claude Code subscription should be able to use that subscription in nagent, not require a separate API key. The "claude-code" provider is a thin wrapper around the Claude Agent SDK that delegates auth to the local Claude Code install. + +**nagent's implementation** (from the README's new "Setup" section): +- Providers: `openai`, `anthropic`, `google`, `cursor`, **`claude-code`**. +- The `claude-code` provider runs prompts through the locally installed Claude Code via the Claude Agent SDK, so authentication is whatever Claude Code is logged in as (subscription or API key). +- The `default` model — same as omitting `--model` — means Claude Code's own configured model; any Claude model id or alias (`sonnet`, `opus`, `haiku`) overrides it. +- Tools are disabled for plain text generation; `nagent-llm-upload` permits only the Read tool so Claude Code can read the file locally. + +**The provider table now reads:** +| Provider | Default model | Credential | +|---|---|---| +| `openai` | `gpt-5.5` | `OPENAI_API_KEY` | +| `anthropic` | `claude-sonnet-4-6` | `ANTHROPIC_API_KEY` | +| `google` | `gemini-2.5-flash` | `GOOGLE_API_KEY` or `GEMINI_API_KEY` | +| `cursor` | `composer-2.5` | `CURSOR_API_KEY` | +| `claude-code` | `default` | **None** — uses local Claude Code login | + +**Manual Slop's current state** (per `src/ai_client.py:2883`): +- Providers in `ai_client.py`: `_send_anthropic`, `_send_gemini`, `_send_gemini_cli`, `_send_deepseek`, `_send_grok`, `_send_minimax`, `_send_qwen`, `_send_llama`, `_send_llama_native`. That's **8 send paths** (some have native vs shared-helper variants). +- The Gemini CLI path (`_send_gemini_cli`) is the **direct analog** of nagent's claude-code provider: it uses a local subprocess (the `gemini` CLI) with whatever auth the user has on their local install. + +**Verdict.** **PARITY.** Manual Slop already has the local-CLI subscription-auth pattern (Gemini CLI). The pattern nagent is adding for Claude Code is the same shape. No new Manual Slop work needed for this *pattern*; the question is whether to add a Claude Code provider specifically. That would be a new provider addition, not a new *pattern*. + +**Domain tag:** Application. The provider list lives in the AI client. + +**Effort:** Medium (a new provider is ~200-400 lines: SDK setup, message adapter, error classification, tool loop integration). nagent's `claude-code` is small because nagent has no GUI; Manual Slop's would be larger because of the multi-provider abstraction layer. + +**Recommended priority:** **LOW.** Not a track; a provider addition that fits into a future "more providers" follow-up if the user wants Claude Code integration. + +**Cross-references:** +- v1 §2 (Text In, Text Out) — the nagent llm-text primitive now has 5 providers; Manual Slop's `send()` has 8. Same shape. +- `qwen_llama_grok_followup_20260611` — the OpenAI-compatible shared helper makes adding a new OpenAI-compatible provider easy (Ollama, Grok, etc.). Claude Code's SDK is *not* OpenAI-compatible, so it would need a new adapter, not just a new entry in the helper. + +--- + +### 2.6 Per-file knowledge notes (`knowledge/files/{file_id}.md`) + +**nagent's claim.** When you know things about a specific file, those notes should live next to the file's identity (inode), not next to a conversation or a session. Then, the next time the file is in scope, the notes come back automatically. + +**nagent's implementation** (from the README's new §8): +- "Notes tied to a specific file mirror into `knowledge/files/{file_id}.md`." +- This is the "harvest" output's per-file projection. +- File identity is `st_dev:st_ino` (per v1 §6) — stable across renames. +- The notes are part of the per-file "neighborhood" alongside the file history, current summary, and co-edited files (per v1 §8). + +**Manual Slop's current state:** +- `src/models.py:510` `FileItem` schema has 9 fields: `path`, `auto_aggregate`, `force_full`, `view_mode`, `selected`, `ast_signatures`, `ast_definitions`, `ast_mask`, `custom_slices`. **No `notes` field.** +- `ContextPreset` is a saved set of `FileItem`s — also no notes propagation. +- The closest existing pattern is `custom_slices[].annotation` (free-form text per slice) and `ast_mask[].comment` (free-form per-symbol), but these are tied to structural slices/masks, not to a "what I learned about this file" note. + +**Verdict.** **GAP (Application).** FileItem has no notes field; the per-file knowledge dimension is absent. + +**Domain tag:** Application. + +**Effort:** Small (1 phase) — add `notes: str = ""` to `FileItem`, add a "Notes" text area to the Structural File Editor (`src/gui_2.py:render_structural_file_editor`), add a `notes_section` to the file-edit initial context (in `aggregate.py:run`). + +**Recommended priority:** **LOW** (small but niche) — bundle with Candidate 11 (knowledge harvest) as a sub-task. + +**Cross-references:** +- v1 §6 (Per-File Memory) — the FileItem + ContextPreset pair is Manual Slop's *curation* per-file memory; the notes field would add a *knowledge* per-file memory. Same identity, different dimension. +- v1 §8 (Neighborhoods) — nagent's "neighborhood" is `{file-history} + {file-summary} + {per-file knowledge} + {co-edited files}`. Manual Slop has parts of this; per-file knowledge notes is the missing piece. + +--- + +### 2.7 "Delete to turn off" feature flags + +**nagent's claim.** Feature flags should be data, not config. If a feature is gated by the presence of a file, the user can turn it off by deleting the file. No GUI toggle, no env var, no `config.toml` edit. Just `rm`. + +**nagent's implementation** (from the README's new §8): +- "A bounded digest.md (open tasks and questions first, newest first) regenerates from the category files — never from raw conversations, so your edits to the category files propagate — and is injected into every conversation's initial context as a {knowledge} block. **Delete digest.md and injection turns off. That is the whole switch.**" + +**The pattern generalized:** A feature is on iff a file exists. The file is the config. The user can flip the switch with `rm` and `touch`. + +**Manual Slop's current state:** +- `[ai_settings.toml]` toggles: `rag_enabled`, `auto_aggregate`, `force_full`, etc. +- GUI checkboxes for many of the same. +- Per-project `manual_slop.toml` settings. + +**Verdict.** **PARITY (DIFFERENT MECHANISM).** Manual Slop uses config files + GUI checkboxes; nagent uses file presence. Both are valid. The nagent pattern is more discoverable in the file tree (you can `ls ~/.nagent/knowledge/` and see "oh, digest.md is here, so the knowledge injection is on"); the Manual Slop pattern is more discoverable in the GUI. + +**Domain tag:** Both (this is a design pattern, not a feature). + +**Effort:** N/A (this is a design pattern, not a track). Worth noting in `conductor/product-guidelines.md` §"AI-Optimized Compact Style" or as a new styleguide. + +**Recommended priority:** **LOW** (design pattern note). + +**Cross-references:** +- The "Live State Inspector" candidate from v1 takeaways §1 (State visibility) — combining the inspector with a "feature presence map" would surface which features are on/off based on file presence. + +--- + +### 2.8 Save-with-graceful-summary-failure + +**nagent's claim.** A save operation should not fail because a non-essential post-step (like an LLM-generated summary) failed. Degrade gracefully: save the artifact, mark the missing piece visibly. + +**nagent's implementation** (from the commit `67a3ea5`): +> "Save-conversation indexes the copy even when the summary LLM fails; fresh conversations build initial context once; compact prompt resolves root-first; edit/compact roll up child token stats" + +And from the README's §6: +> "`--save-conversation NAME` copies the conversation and records it, with an LLM-generated summary, in a saved-conversations index. **If the summary fails (no credentials, provider down), the save still completes — the index gets a visible '(summary unavailable)' marker instead of losing the entry.**" + +**The pattern.** Critical operation completes; non-critical post-step is best-effort. The marker (`(summary unavailable)`) is visible and explicit. The user can re-run the summary later if they want. + +**Manual Slop's current state:** +- `ai_client.run_discussion_compression(disc_text)` is the equivalent of `--summarize` in nagent. +- The Compress button in the GUI calls this; on LLM failure, the discussion is *not* replaced (presumably — needs source verification). +- The current behavior on LLM failure is unknown without reading the source. + +**Verdict.** **UNKNOWN** without reading the source. If Manual Slop's `run_discussion_compression` raises on LLM failure, that's a gap (a failed Compress should not destroy the original). If it returns the original on failure, parity. + +**Domain tag:** Application. + +**Effort:** Small (1 phase) IF the current behavior is "raise on failure." Trivial (just a test) IF the current behavior is "fall back to original." + +**Recommended priority:** **MEDIUM** (or maybe HIGH if the current behavior is destructive). Needs verification. + +**Cross-references:** +- `data_oriented_error_handling_20260606` — the "errors are just cases" framework means `run_discussion_compression` should return a `Result[str, ErrorInfo]`, not raise. If the current code raises, that's a pre-existing bug that this v2 finding surfaces. + +--- + +### 2.9 Delegation reframed as "context management, not parallelism" + +**nagent's claim.** "Delegation is context management before it is parallelism." The reason to spawn a sub-conversation is to keep the parent's context clean. The fact that the child runs concurrently (sometimes) is incidental. + +**nagent's implementation** (from the README's new §12): +> "And hand off when noisy: when its own conversation is mostly stale tool output, distill goal, state, and decisions into a fresh sub-conversation and delegate the rest — compaction semantics through the one mechanism the model already has, without racing the live file." + +The reframing table: +| Long-lived agent abstractions | Disposable workers | +|---|---| +| Identity is central | Output artifact is central | +| Shared context gets noisy | Child context is isolated | +| Parent absorbs all exploration | Parent gets a concise result | +| Delegation implies personality | Delegation is context management | + +**Manual Slop's current state:** +- `src/multi_agent_conductor.py:run_worker_lifecycle` already does this implicitly: each MMA Tier 3 worker is a fresh subprocess with Context Amnesia. +- The "disposable worker" pattern is already the MMA pattern. +- The reframing as "context management" is *new phrasing*, not new behavior. + +**Verdict.** **PARITY (NEW FRAMING).** Manual Slop's MMA already does the right thing; the new framing is a *philosophical* addition. Useful for the next design discussion about *why* MMA has subprocesses, not whether it should. + +**Domain tag:** Both (philosophical; the design pattern is documented, not implemented). + +**Effort:** N/A (design pattern note). + +**Recommended priority:** **LOW** (documentation update, not a track). + +**Cross-references:** +- v1 §9 (Sub-conversations) — the user's explicit want for 1:1 sub-conversations. +- `docs/guide_mma.md` §"Token Firewalling" — the existing framing is "firewall the token budget," not "manage the parent's context." Both are true; the latter is the better framing for the SubConversationRunner design. + +--- + +## 3. The 13-step "Build Your Own" list — what changed + +The v1 report's Appendix A cited the 12-step Build Your Own list. v2 nagent's list has **13 steps** (the new step is **#10: Harvest dead conversations into a knowledge store; inject a bounded digest**). + +| # | v1 (12 steps) | v2 (13 steps) | Change | +|---|---|---|---| +| 1 | `generate_text(file) -> str` | Same | Unchanged | +| 2 | Growing conversation document | Same | Unchanged | +| 3 | Initial context that states the contract | Same | Unchanged | +| 4 | Output format and a small strict parser | Same | Unchanged | +| 5 | Handlers that append results back into state | Same | Unchanged | +| 6 | Loop after actions | Same | Unchanged | +| 7 | Visible retry on malformed output | Same | Unchanged | +| 8 | Save/load/branch/edit/compact for conversation maintenance | Same | Unchanged (`compact` is new; `branch` is new) | +| 9 | Repository history → context blocks | Same | Unchanged | +| 10 | (n/a) | **Harvest dead conversations into a knowledge store; inject a bounded digest** | **NEW** | +| 11 | Per-artifact memory with stable ids and bounded write authority | Same | Unchanged | +| 12 | Split/index/patch for large files | Same | Unchanged | +| 13 | Child loops for delegation | Same | Unchanged | + +**The implication for v1's Appendix A**: it's now stale by one step. The 12-step list should become 13. The new step 10 is the most important Manual Slop candidate (Candidate 11). + +--- + +## 4. Staleness in v1 artifacts + +This is the actionable section. For each v1 artifact, what needs updating? + +### 4.1 `report.md` (v1) — staleness map + +| Section | Staleness | Action needed | +|---|---|---| +| §1 (Durable Work) | Partially stale (no mention of knowledge harvest or stable-to-volatile ordering) | Append a paragraph on knowledge harvest as a "what files buy you" consequence | +| §2 (Text In, Text Out) | Mostly correct; one detail: 5 providers now (was 4) | Note the 5th provider (claude-code) in the comparison | +| §3 (Conversations Are Editable State) | Stale — no mention of `--compact`, `--branch-conversation`, or compaction guidance | Add a new sub-section on compaction as distinct from summarization | +| §4 (Visible Output Protocol) | Still correct, but nagent has a new `nagent_tags.py` explicit parser | Note the parser refactor (was regex; now explicit) | +| §5 (The Loop) | Still correct | None | +| §6 (Per-File Memory) | Stale — no mention of per-file knowledge notes (`knowledge/files/{file_id}.md`) | Add a paragraph on per-file notes as a *new* dimension | +| §7 (Repository History) | Still correct | None | +| §8 (Neighborhoods) | Stale — no mention of the new "everything else files buy you" expansion in §9 of v2 README | Add cross-reference to the new §9 | +| §9 (Sub-Conversations) | Stale — no mention of the new "delegation as context management" reframing or the new `conversation-file="name"` worker reuse pattern | Add a paragraph on the reframing and the worker-reuse pattern | +| §10 (Controlled Writes) | Still correct | None | +| §11 (Large Files) | Still correct (the splitter O(n²) → O(n) fix is performance, not semantic) | Note the perf fix in passing | +| §12 (Tool Discovery) | Still correct | None | +| §13 (Differences from Frameworks) | Stale — the v2 README reframes this as "Own the Inputs" (Part VII §14) | Update title to "Own the Inputs" and note the new framing | +| §14 (Build Your Own) | Stale — 12 steps is now 13 | Bump to 13, add the knowledge harvest step | +| §15 (The 6 Pitfalls) | Stale — no mention of "no knowledge harvest" or "no stable-to-volatile ordering" or "no behavior-preserving compaction" | Add 3 new pitfalls (or replace 3 of the 6 with the new ones) | +| Appendix A (Cross-reference table) | Stale — `bin/nagent-llm-text` now has 5 providers, not 4 | Update provider count | +| Appendix B (Citations) | Stale — missing the v2 commit SHAs and the new `nagent-gc`, `nagent_tags.py`, `context/data-oriented-design.md` files | Update citations | + +**Net:** 9 of 16 sections need updates. The v1 report is not *wrong* — the 14-section structure is still correct — but it's missing the v2 additions. A v3 (or v1-revised) report would add ~150-200 lines covering the new patterns. + +### 4.2 `comparison_table.md` (v1) — staleness map + +| Row | Staleness | Action needed | +|---|---|---| +| §3 (Editable State) | Stale — no mention of `--compact` or `--branch-conversation` | Add column for "compaction" | +| §6 (Per-File Memory) | Stale — no mention of per-file notes | Add column for "notes" | +| §8 (Neighborhoods) | Still correct | None | +| §9 (Sub-Conversations) | Stale — no mention of worker-reuse or compaction-via-handoff | Add column for "context management" | +| §11 (Large Files) | Mostly correct (the perf fix is a detail) | Add a row about prompt caching (nagent's `--cache-prefix-chars`) | +| §12 (Tool Discovery) | Still correct | None | +| §14 (Build Your Own) | Stale (12 steps → 13) | Update step count | +| **NEW ROW** | n/a | Add row for "Knowledge Harvest" (nagent §8) — Manual Slop verdict: GAP (RAG is not the same shape) | +| **NEW ROW** | n/a | Add row for "Prompt Caching Strategy" (nagent §1) — Manual Slop verdict: PARTIAL (mechanism present, ordering not enforced) | +| **NEW ROW** | n/a | Add row for "Compaction vs Summarization" (nagent `--compact`) — Manual Slop verdict: GAP (Compress button is summarize, not compact) | +| **NEW ROW** | n/a | Add row for "Per-File Knowledge Notes" (nagent `knowledge/files/{file_id}.md`) — Manual Slop verdict: GAP (FileItem has no notes field) | + +**Net:** 4 existing rows need updates, 4 new rows needed. The flat table grows from 14 rows to 18 rows. + +### 4.3 `decisions.md` (v1) — staleness map + +| Candidate | Staleness | Action needed | +|---|---|---| +| 1 (SubConversationRunner) | Mostly correct, but v2 adds: return value should include knowledge updates, not just string artifact | Update return-type description to include `knowledge_updates: list[KnowledgeBullet]` | +| 2 (RAG pre-staging) | Stale — knowledge harvest might be a *better* answer to the same problem (the user wants "I prep before I run"; nagent's harvest is the same intent, but for already-completed runs, not pre-runs) | Add a paragraph noting the overlap and recommending Candidate 11 (knowledge harvest) as the primary answer, with Candidate 2 (RAG pre-staging) as a special case for *upcoming* runs | +| 3 (Stateless LLMClient) | Stale — must now support stable-to-volatile context ordering as a design constraint | Add design constraint: the LLMClient constructor takes a "context builder" that emits layers in stable-to-volatile order | +| 4 (Intent DSL) | Still correct | None | +| 5 (Self-describing tools) | Still correct | None | +| 6 (git_history) | Still correct | None | +| 7 (Per-file conversation log) | Stale — per-file knowledge notes (Candidate 11) might be a better answer (the user has the notes dimension, not the conversation-log dimension) | Re-rank: knowledge notes are simpler and more durable; conversation log is heavier | +| 8 (coedited_files) | Still correct | None | +| 9 (split/patch lib) | Still correct (and the O(n²) → O(n) perf fix in nagent is a hint that this is worth doing if/when needed) | Note the perf fix | +| 10 (raw-transcript persistence) | Still correct | None | +| **NEW 11** | n/a | **Knowledge Harvest** (per §2.1 above) — HIGH priority | +| **NEW 12** | n/a | **Stable-to-Volatile Context Ordering for Caching** (per §2.2 above) — MEDIUM priority | +| **NEW 13** | n/a | **Conversation Compaction** (per §2.3 above) — MEDIUM priority | +| **NEW 14** | n/a | **Project Context File** (per §2.4 above) — LOW priority (small but easy) | +| **NEW 15** | n/a | **Save-with-Graceful-Summary-Failure** (per §2.8 above) — needs source verification; possibly HIGH if current behavior is destructive | + +**Net:** 4 existing candidates need updates; 5 new candidates needed. The decisions list grows from 10 to 15. + +### 4.4 `nagent_takeaways_20260608.md` (v1) — staleness map + +| Takeaway | Staleness | Action needed | +|---|---|---| +| §1 (State visibility) | Still correct | None | +| §2 (Readable conversation log) | Stale — nagent v2's `--compact` is a third option (rewrite to preserve structure) | Add a paragraph on compaction as option C | +| §3 (Sub-agents for 1:1) | Stale — should mention the v2 reframing ("delegation is context management") and the new `conversation-file="name"` worker reuse | Update the design constraint section | +| §4 (File identity) | Still correct | None | +| §5 (One loop, one file) | Stale — the v2 stable-to-volatile ordering is the "one loop" insight refined | Add a paragraph on the ordering insight | +| §6 (Visible retry) | Still correct | None | +| §7 (Prompts vs function calls) | Stale — nagent v2 added a stricter explicit parser (`nagent_tags.py`) | Note the parser refactor | +| §8 (Self-describing tools) | Still correct | None | +| §9 (Edit the input, not the output) | Stale — the v2 compaction is "rewrite the input to be smaller while preserving intent" | Add a paragraph on compaction as "edit the input" | +| §10 (Sub-agent return type) | Stale — the v2 example shows the return type is ``, with no knowledge update | Update the return type to include knowledge updates | +| **NEW** | n/a | **Knowledge Harvest** (10-15 lines, per §2.1) — HIGH priority actionable | +| **NEW** | n/a | **Stable-to-Volatile Context Ordering** (10-15 lines, per §2.2) — MEDIUM priority actionable | +| **NEW** | n/a | **Conversation Compaction** (10-15 lines, per §2.3) — MEDIUM priority actionable | + +**Net:** 6 of 10 takeaways need updates; 3 new takeaways needed. The takeaways doc grows from 10 to 13. + +--- + +## 5. What is still correct in v1 + +For completeness — what's *not* stale: + +- **All 14 v1 sections are still structurally correct.** The 14 patterns (durable work, text-in-text-out, editable state, visible protocol, the loop, per-file memory, repo history, neighborhoods, sub-conversations, controlled writes, large files, tool discovery, differences from frameworks, build your own) are *all still there* in the v2 README. The v2 README re-organizes them into 7 Parts with a teaching-arc structure, but the substance is the same. +- **The 6 v1 pitfalls are still real.** None of them have been "solved" by the v2 changes. The 3 new pitfalls in v2 (no knowledge harvest, no stable-to-volatile ordering, no behavior-preserving compaction) are *additions*, not corrections. +- **The 4 Application features (per file:line) that v1 said are strong are still strong.** FileItem + ContextPreset + Fuzzy Anchors + UISnapshot have not been deprecated or replaced. +- **The Application vs Meta-Tooling distinction is still load-bearing.** v2 nagent is still a Meta-Tooling reference; the Application's choices (provider-native function calling, GUI, long-lived state) are still the right ones for the Application domain. +- **The 10 future-track candidates are all still real candidates.** 4 of them need updates; none are obsoleted. +- **The "Application is intentionally not nagent" claim is still true.** v2 nagent's new features (knowledge harvest, compaction, prompt caching) are *more* reason to keep the Application's choices — these patterns would add complexity that the Application doesn't need at its current scale. + +--- + +## 6. New future-track candidates (formal proposals) + +### Candidate 11: Knowledge Harvest & Store (HIGH priority) + +**User signal:** Not yet surfaced (the v1 review didn't see this; v2 surfaces it for the first time). + +**Why it matters.** RAG is the wrong shape for "what did we learn from past sessions that we want to inject as stable knowledge." RAG is for *semantic retrieval at query time*; knowledge harvest is for *durable, auditable, user-editable knowledge* injected as a stable prefix. Manual Slop's RAG (`src/rag_engine.py:1-384`) and nagent's knowledge harvest (nagent `nagent-gc` + `~/.nagent/knowledge/`) solve different problems: +- RAG: "given a query, find similar chunks in the corpus" (vector similarity, fuzzy, opaque) +- Knowledge harvest: "given a corpus, distill durable facts into a user-editable store with provenance" (markdown files, exact, auditable) + +**What it would do.** A new `src/knowledge_store.py` module + companion `src/knowledge_harvest.py`: +- `KnowledgeStore` class with `add_bullet(category, text, provenance)`, `get_digest(budget_chars)`, `regenerate_digest()`, `delete_digest()` (turn-off switch) +- `KnowledgeHarvester` class with `harvest_conversation(discussion) -> list[KnowledgeBullet]` (LLM call against an editable `prompts/harvest-conversation.md`) +- A `src/harvest_cli.py` (or GUI panel) that does the dry-run → apply cycle, like `nagent-gc` +- A bounded `{knowledge}` block injected into `aggregate.py:run` initial context (the *stable* position — it's cache-friendly) +- A "Knowledge" panel in the GUI (similar to the Logs Management panel) for browsing, editing, pruning +- Per-file knowledge notes in `~/.manual_slop/knowledge/files/{file_id}.md` (parallel to `FileItem.notes` Candidate 11.1) + +**Where it lives.** Application. The knowledge store is user-editable; the harvest is an in-process LLM call. + +**Depends on.** `data_oriented_error_handling_20260606` (the `Result`/`ErrorInfo` pattern for the harvest LLM call's return type). + +**Effort.** **Large.** 3-5 phases: (1) KnowledgeStore + digest regeneration, (2) KnowledgeHarvester + harvest-conversation prompt, (3) GUI panel + file picker, (4) aggregate.py integration + cache-position verification, (5) per-file notes + FileItem extension. ~500-800 lines + tests. + +**Recommended priority.** **HIGH** — re-ranks above Candidates 4, 6, 7, 8, 9, 10. The user has not yet seen this; surfacing it as the v2 report's primary output is the right next step. + +**Cross-references:** +- v1 §6 (Per-File Memory) — adds a *knowledge* dimension alongside the *curation* dimension. +- v1 §7 (Repository History) — git history is a *kind* of "preserved work" that nagent now has a second instance of (knowledge harvest) to complement. +- `data_oriented_error_handling_20260606` — the harvest LLM call is the first use case that benefits from a `Result[str, list[KnowledgeBullet], ErrorInfo]` return type. + +--- + +### Candidate 12: Stable-to-Volatile Context Ordering for Caching (MEDIUM priority) + +**User signal:** Not yet surfaced. Anthropic caching is in place (per `docs/guide_ai_client.md`); the *ordering discipline* is not. + +**Why it matters.** Anthropic's `cache_control` markers work on *block boundaries*. The cost benefit comes from the stable prefix being *byte-identical* across turns. If the order of context layers changes per turn (e.g., per-discussion system prompt, per-discussion tool list, per-turn diff), the cache hit rate drops. + +**What it would do.** A refactor of `src/ai_client.py:_get_combined_system_prompt` and the Anthropic-specific call site to enforce stable-to-volatile ordering: +- **Stable layers** (in order, identical across turns of the same mode): + 1. Role instructions (the model + provider) + 2. Tag protocol / tool protocol / function-calling schema + 3. Discovered tool descriptions + 4. System prompt (the user's chosen preset) + 5. Persona profile (if any) + 6. Project context (per `manual_slop.toml` — Candidate 14) + 7. Knowledge digest (if Candidate 11 is built) +- **Volatile layers** (per-turn, not cached): + 8. Instance facts (current discussion, current file items) + 9. Tool-call results from prior turns + 10. The user message + +**Where it lives.** Application. The `ai_client.py` refactor. + +**Depends on.** None directly. Could leverage `qwen_llama_grok_followup_20260611`'s `send_openai_compatible()` helper for the Anthropic-specific call site. + +**Effort.** **Small to medium.** 1-2 phases if the existing `_build_chunked_context_blocks` already does the right thing (it might, just not formally). 2-3 phases if the chunks need to be re-positioned. + +**Recommended priority.** **MEDIUM.** Real cost savings on Anthropic-heavy usage. Should be preceded by a measurement pass: log the cache hit rate before and after, so the win is quantified. + +**Cross-references:** +- `docs/guide_ai_client.md` §"Anthropic ephemeral + Gemini explicit caching" — the existing pattern. +- v1 §5 (The Loop) — the loop's append/parse/act structure is the volatility; the cache lives in the stable prefix. + +--- + +### Candidate 13: Conversation Compaction (MEDIUM priority) + +**User signal:** Not yet surfaced. The Compress button is summarization; the user might prefer compaction for "I want this conversation shorter but still multi-turn." + +**Why it matters.** Summarization loses the multi-turn shape that the LLM's chat completion API expects. Compaction rewrites the conversation in place, preserving the structure but reducing word count. + +**What it would do.** A new `ai_client.run_discussion_compaction(disc_text, prompt_path="~/.manual_slop/prompts/compact-discussion.md")` that: +- Reads an editable compaction prompt (root-first: `~/.manual_slop/prompts/compact-discussion.md` overrides the shipped version) +- Calls the LLM to produce a compacted multi-turn rendering of the conversation +- Validates the output: must be a `list[dict]` with the same role/content/collapsed shape as the input +- Falls back to the original on parse failure (graceful, per nagent v2's save pattern) + +A new `gui_2.py` button "Compact" (next to the existing "Compress") that calls this instead of `run_discussion_compression`. + +**Where it lives.** Application. + +**Depends on.** None. + +**Effort.** **Small to medium.** 1-2 phases. The existing `run_discussion_compression` is a starting template. + +**Recommended priority.** **MEDIUM.** Worth doing; not as urgent as Candidate 11 or 12. + +**Cross-references:** +- v1 §3 (Editable State) — the "compaction guidance is user-editable" pattern parallels nagent v2's `prompts/compact-conversation.md`. +- v1 §15.2 (Provider-specific history in process globals) — compaction might be a stepping stone to the Stateless LLMClient refactor (Candidate 3): if the conversation is compacted to a known shape, the projection of `disc_entries` to provider history becomes trivial. + +--- + +### Candidate 14: Project Context File (LOW priority, but small) + +**User signal:** Not yet surfaced. + +**Why it matters.** `manual_slop.toml` is project config; it's the right shape for paths, presets, and hooks. But it is not the right shape for "operating rules that travel with the repo" (e.g., "always be terse; prefer 200-line responses; focus on file X"). A `manual_slop_context.md` at the project toplevel would inject as a `{project-context}` block in the initial context, just before the volatile layers. + +**What it would do.** A new `[context_files]` section in `manual_slop.toml` (or a top-level `manual_slop_context.md` file) read by `aggregate.py:run` at discussion start. + +**Where it lives.** Application. `aggregate.py:run` is the consumer. + +**Depends on.** None. + +**Effort.** **Small.** 1 phase. ~100 lines + a documentation note. + +**Recommended priority.** **LOW** (small but niche). Could be done as a small follow-on to Candidate 12 (both touch `aggregate.py:run` ordering). + +**Cross-references:** +- v1 §1 (Durable Work) — the "data is the thing" philosophy says project context should be a file. +- Candidate 12 (Stable-to-Volatile Ordering) — the project context is a *stable* layer in the new ordering; adding Candidate 14 first makes Candidate 12's design simpler. + +--- + +### Candidate 15: Save-with-Graceful-Summary-Failure (priority TBD, needs source read) + +**User signal:** Not yet surfaced. + +**Why it matters.** nagent v2 makes this an explicit principle: critical operations complete; non-essential post-steps are best-effort. Manual Slop's `run_discussion_compression` is the candidate for verification. + +**What it would do.** (PENDING VERIFICATION) — read `src/ai_client.py:run_discussion_compression` and the `_handle_compress_discussion:3357` path to see if a failed LLM call destroys the original discussion. + +**Where it lives.** Application. + +**Depends on.** None. + +**Effort.** **Small** (1 phase) IF the current behavior is "raise on failure." Trivial (just a test) IF the current behavior is "fall back to original." + +**Recommended priority.** **TBD** — MEDIUM if the current behavior is destructive (it would be a latent bug). LOW if not. Verification first. + +**Cross-references:** +- `data_oriented_error_handling_20260606` — the `Result` pattern means a failed LLM call returns `Result.error`, not raises. If the current code raises, that's a pre-existing bug. + +--- + +## 7. Impact on existing future-track candidates + +| Candidate | v1 priority | v2 priority change | Why | +|---|---|---|---| +| 1 (SubConversationRunner) | HIGH | HIGH (unchanged) | The v2 `conversation-file="name"` pattern is a *new* use case for the runner; return type should include `knowledge_updates` | +| 2 (RAG pre-staging) | HIGH | MEDIUM (down) | The knowledge harvest (Candidate 11) is a better answer to the "I prep before I run" intent. RAG pre-staging remains useful for *upcoming* runs (where harvest is post-hoc) but is no longer the primary pattern | +| 3 (Stateless LLMClient) | MEDIUM | MEDIUM (unchanged) | Now must support stable-to-volatile ordering as a design constraint | +| 4 (Intent DSL) | LOW | LOW (unchanged) | No change | +| 5 (Self-describing tools) | LOW | LOW (unchanged) | No change | +| 6 (git_history) | MEDIUM | MEDIUM (unchanged) | No change | +| 7 (Per-file conversation log) | LOW | LOW (down to LOW-bundle) | Per-file knowledge notes (Candidate 11) are a simpler, more durable answer to the same intent. Conversation log is heavier and overlaps with `disc_entries` | +| 8 (coedited_files) | LOW | LOW (unchanged) | No change | +| 9 (split/patch lib) | DEFER | DEFER (unchanged) | No change (nagent v2's O(n²) → O(n) perf fix is a hint, not a trigger) | +| 10 (raw-transcript persistence) | LOW | LOW (unchanged) | No change | +| **NEW 11** | n/a | **HIGH** | The single most important v2 finding | +| **NEW 12** | n/a | MEDIUM | Real cost savings on Anthropic-heavy usage | +| **NEW 13** | n/a | MEDIUM | Worth doing; not urgent | +| **NEW 14** | n/a | LOW | Small but easy | +| **NEW 15** | n/a | TBD | Needs source verification | + +--- + +## 8. Verification needs (what to read before the next review) + +The v2 report's claims are grounded in the new README and commit messages. The following Manual Slop internals were not read in full and would benefit from a Tier 2 Tech Lead source read before any of the new candidates (11-15) are scoped: + +1. **`src/aggregate.py:run`** (1-518) — verify the current context-builder ordering. Is it stable-to-volatile? Where do tool descriptions, system prompt, and project context sit relative to the volatile layers? *(Needed for Candidates 12 and 14.)* + +2. **`src/ai_client.py:run_discussion_compression`** — verify the failure mode. Does it raise, return None, or return the original? *(Needed for Candidate 15.)* + +3. **`src/ai_client.py:_send_anthropic`** — verify the cache_control block placement. Are the markers at stable/volatile boundaries, or just at chunk-size boundaries? *(Needed for Candidate 12.)* + +4. **`src/ai_client.py:_get_combined_system_prompt`** (referenced in summary) — verify the order of layers in the system prompt. The v1 report assumed "system prompt + tool list first" but the actual order may differ. *(Needed for Candidate 12.)* + +5. **`src/ai_client.py:run_subagent_summarization`** (referenced in v1) — verify the retry budget. nagent's `SUMMARY_MAX_ATTEMPTS = 2` is a fixed cap; Manual Slop's budget may differ. *(Needed for any future work on summarization.)* + +6. **`src/rag_engine.py:RAGEngine.search`** (1-384) — verify the `mtime` invalidation and the ChromaDB persistence path. The knowledge harvest is *not* a replacement for RAG; the two coexist. Understanding the existing RAG's failure modes is needed to scope Candidate 11 (what does the knowledge harvest *not* need to do because RAG already does it?). + +7. **`src/paths.py`** — verify the per-project override path. The "project context file" pattern (Candidate 14) needs to be resolved at the project level; the existing `paths.py` API is the right shape. *(Needed for Candidate 14.)* + +8. **`src/models.py:FileItem`** (around line 510) — verify the existing schema. Adding a `notes` field is a non-breaking change, but the migration story (existing `manual_slop.toml` files) needs to be defined. + +These reads can be done in a single Tier 2 source-read pass (1-2 hours); they don't require a full track. The output is a revised Candidates 11-15 with concrete file:line references and effort estimates. + +--- + +## 9. Recommended next steps (for the user) + +The user (the product owner) said: "After we'll look into updating upcoming tracks and documentation related to it, along with the agent workflow docs." The v2 report's recommendations, in priority order: + +1. **Surface Candidate 11 (Knowledge Harvest) to the user.** This is the single most important v2 finding. The user has not seen this pattern. If the user wants to pursue it, the v1 `decisions.md` should be updated to add it as Candidate 11 with HIGH priority, bumping the existing list. + +2. **Verify Candidate 15 first (Save-with-Graceful-Summary-Failure).** This is potentially a latent bug; verification is cheap (one source read); if the bug exists, it's the highest-priority fix in the entire v2 report. + +3. **Update v1's `decisions.md` and `comparison_table.md` to reflect the 4 stale sections and 5 new candidates.** This is a documentation update, not a code change. The user mentioned "documentation related to it" as the next step after the report. + +4. **Update v1's `nagent_takeaways_20260608.md` to add 3 new actionable patterns** (knowledge harvest, stable-to-volatile ordering, conversation compaction) and update 6 of the 10 existing takeaways with the v2 insights. + +5. **Update v1's `report.md` to add new sub-sections on knowledge harvest, stable-to-volatile ordering, and conversation compaction.** A v1.1 update is appropriate; the v1 file is not deleted. + +6. **Update the agent workflow docs** (`AGENTS.md`, `conductor/workflow.md`, `conductor/product-guidelines.md`) to incorporate the v2 patterns as design principles: + - The "knowledge is data" pattern (provenance, user-editable, delete-to-turn-off) → add to `product-guidelines.md` §"AI-Optimized Compact Style" or as a new styleguide `conductor/code_styleguides/knowledge_artifacts.md` + - The stable-to-volatile ordering for caching → add to `conductor/tech-stack.md` §"ai_client" or as a new styleguide `conductor/code_styleguides/cache_friendly_context.md` + - The "compaction vs summarization" distinction → add to `conductor/code_styleguides/llm_workflow.md` (new styleguide) + - The "delegation is context management, not parallelism" framing → update `docs/guide_mma.md` §"Token Firewalling" to use the new framing + +7. **Schedule a Tier 2 source-read pass** to verify the 8 items in §8 above, and produce a revised `decisions.md` with concrete file:line references for Candidates 11-15. + +8. **Consider whether to bump the v1 review's track to "completed" once the v2 follow-up is integrated.** The v1 review's track is currently `active` (per `state.toml`); the v2 report is a natural follow-up. After the user reviews the v2 report and confirms which new candidates to pursue, the v1 review can be marked completed and a new track (or several) can be initialized for the chosen candidates. + +--- + +## 10. References (v2-specific) + +- **nagent source:** https://github.com/macton/nagent (at commit `eb6be32a`, 2026-06-12 00:25:50 UTC) +- **nagent v2 README:** https://github.com/macton/nagent/blob/main/README.md (regenerated 2026-06-12) +- **v2 commit log:** https://api.github.com/repos/macton/nagent/commits?per_page=30 (4 substantive commits: `2c3c78b`, `67a3ea5`, `5e269ca`, `ee72cb4`) +- **v1 review artifacts (preserved, not deleted):** + - `report.md` — v1 14-section deep-dive + - `comparison_table.md` — v1 flat reference + - `decisions.md` — v1 10 future-track candidates + - `nagent_takeaways_20260608.md` — v1 10 actionable patterns + - `spec.md` — v1 track wrapper + - `state.toml` — v1 track state + - `metadata.json` — v1 track metadata + +## Appendix A. Cross-reference: v1 sections → v2 README sections + +| v1 section | v2 location (Part + §) | New content in v2? | +|---|---|---| +| 1. Durable Work | Part II §5 (You Did Not Build an Agent) | Reframed (no new content) | +| 2. Text In, Text Out | Part I §1 | claude-code provider added | +| 3. Editable State | Part III §6 | --compact, --branch-conversation, user-editable compaction prompt | +| 4. Visible Protocol | Part I §2 | nagent_tags.py explicit parser (was regex) | +| 5. The Loop | Part I §3 | Caching integration; stable-to-volatile ordering | +| 6. Per-File Memory | Part VI §13 | Per-file knowledge notes (knowledge/files/{file_id}.md) | +| 7. Repository History | Part IV §7 | Unchanged | +| 8. Neighborhoods | Part VI §11 | Per-file knowledge notes joined to neighborhood | +| 9. Sub-Conversations | Part VI §12 | "Delegation is context management, not parallelism"; `conversation-file="name"` worker reuse | +| 10. Controlled Writes | Part I §3 (in-loop) | Unchanged | +| 11. Large Files | Part VI §12 | O(n²) → O(n) perf fix in splitter | +| 12. Tool Discovery | Part I §4 | nagent-gc added to tool list | +| 13. Differences from Frameworks | Part VII §14 | Reframed as "Own the Inputs" | +| 14. Build Your Own | (end of README) | 12 → 13 steps (knowledge harvest added) | +| **NEW** | Part IV §8 | **Harvest Knowledge, Reclaim Space** (the big new pattern) | +| **NEW** | Part IV §9 | **Everything Else Files Buy You** (case-study enumeration) | +| **NEW** | Part V §10 | **Data-Oriented Design** (formal name for the principles) | +| **NEW** | Setup | claude-code provider documented | + +**Net:** 3 new parts (IV, V, VI explicit in v2; was a flat 14 in v1), 4 new numbered sections (8, 9, 10, and the new 11/12/13 expansions), 13-step Build Your Own (was 12). + +--- + +End of v2 report. diff --git a/conductor/tracks/nagent_review_20260608/state.toml b/conductor/tracks/nagent_review_20260608/state.toml index c6ee5c4d..b8cdef28 100644 --- a/conductor/tracks/nagent_review_20260608/state.toml +++ b/conductor/tracks/nagent_review_20260608/state.toml @@ -7,7 +7,7 @@ track_id = "nagent_review_20260608" name = "nagent Review (Mike Acton's data-oriented LLM agent reference)" status = "active" current_phase = 0 # 0 = pre-completion; this track produces no code phases -last_updated = "2026-06-08" +last_updated = "2026-06-12" [user_corrections_log] # Corrections applied to the first draft based on direct user feedback during review @@ -39,6 +39,44 @@ t_write_07 = { status = "pending", commit_sha = "", description = "Add entry t_write_08 = { status = "pending", commit_sha = "", description = "Human review of report.md + nagent_takeaways_20260608.md (final)" } t_archive = { status = "pending", commit_sha = "", description = "Move track to conductor/tracks/archive/ when follow-up tracks are specced (or sooner if no value remains)" } +# v2 review (2026-06-12): 8 new nagent commits since v1; README restructured; knowledge harvest is the major new pattern +t_v2_review_01 = { status = "completed", commit_sha = "", description = "v2 review: enumerate 8 new nagent commits between 2026-06-08 and 2026-06-12" } +t_v2_review_02 = { status = "completed", commit_sha = "", description = "v2 review: document knowledge harvest (nagent-gc) pattern + per-file notes" } +t_v2_review_03 = { status = "completed", commit_sha = "", description = "v2 review: document stable-to-volatile context ordering for prompt caching" } +t_v2_review_04 = { status = "completed", commit_sha = "", description = "v2 review: document conversation compaction (--compact) vs summarization" } +t_v2_review_05 = { status = "completed", commit_sha = "", description = "v2 review: document project context files, claude-code provider, save-with-graceful-failure" } +t_v2_review_06 = { status = "completed", commit_sha = "", description = "v2 review: map staleness in v1 report.md / comparison_table.md / decisions.md / nagent_takeaways_20260608.md" } +t_v2_review_07 = { status = "completed", commit_sha = "", description = "v2 review: propose 5 new future-track candidates (11-15) with priority and effort" } +t_v2_review_08 = { status = "completed", commit_sha = "", description = "v2 review: write nagent_review_v2_20260612.md (new file; v1 preserved)" } +t_v2_review_pending_01 = { status = "pending", commit_sha = "", description = "User to surface Candidate 11 (Knowledge Harvest) as the primary v2 finding" } +t_v2_review_pending_02 = { status = "pending", commit_sha = "", description = "Tier 2 source-read: verify Candidate 15 (save-with-graceful-summary-failure) is potentially a latent bug" } +t_v2_review_pending_03 = { status = "pending", commit_sha = "", description = "Update v1 decisions.md to add Candidates 11-15 and refresh re-rankings" } +t_v2_review_pending_04 = { status = "pending", commit_sha = "", description = "Update v1 comparison_table.md to add 4 new rows and 4 row updates" } +t_v2_review_pending_05 = { status = "pending", commit_sha = "", description = "Update v1 nagent_takeaways_20260608.md to add 3 new takeaways and refresh 6 existing" } +t_v2_review_pending_06 = { status = "pending", commit_sha = "", description = "Update agent workflow docs (AGENTS.md, conductor/workflow.md, conductor/product-guidelines.md) to incorporate v2 design principles" } +t_v2_review_pending_07 = { status = "pending", commit_sha = "", description = "Tier 2 source-read: verify 8 items in nagent_review_v2_20260612.md §8 before any new candidate is scoped" } + +# v2.1 review (2026-06-12, second user iteration): user corrections to v2 +# v2 file is PRESERVED as the draft; v2.1 is the user-revised version +t_v2_1_review_01 = { status = "completed", commit_sha = "", description = "v2.1: read full nagent source (bin/nagent, nagent_gc_lib.py, nagent_tags.py, nagent_llm.py, nagent_gc CLI, prompts/*.md, context/data-oriented-design.md, CLAUDE.md) — 18 files in full" } +t_v2_1_review_02 = { status = "completed", commit_sha = "", description = "v2.1: reframe Candidate 11 from 'RAG alternative' to 'third memory dimension' (curation + discussion + RAG + knowledge)" } +t_v2_1_review_03 = { status = "completed", commit_sha = "", description = "v2.1: swap CLAUDE.md → AGENTS.md throughout (Manual Slop has AGENTS.md, not CLAUDE.md)" } +t_v2_1_review_04 = { status = "completed", commit_sha = "", description = "v2.1: add cache TTL GUI controls (sub-candidate 12b) — per the user's explicit ask for 'how long the caches are available for (gemini has a limit for example)'" } +t_v2_1_review_05 = { status = "completed", commit_sha = "", description = "v2.1: add new RAG integration discipline sub-section (§2.10) — 'we should be conservative' about wiring RAG; codify when RAG fits (semantic search across large codebases) and when it does not (curation/discussion/knowledge)" } +t_v2_1_review_06 = { status = "completed", commit_sha = "", description = "v2.1: preserve v2 as the draft (NON-DESTRUCTIVE write to nagent_review_v2_1_20260612.md)" } +t_v2_1_review_07 = { status = "completed", commit_sha = "", description = "v2.1: preserve Readme.md and docs/Readme.md as human-facing; propose new agent-facing files instead (AGENTS.md @import update; new ./docs/AGENTS.md)" } +t_v2_1_review_08 = { status = "completed", commit_sha = "", description = "v2.1: write nagent_review_v2_1_20260612.md (new file, ~59KB) with the reframe, the swap, the new styleguide list, the new docs list, and the workflow doc update plan" } + +# v2.1 pending (for the next turn) +t_v2_1_review_pending_01 = { status = "pending", commit_sha = "", description = "User review of v2.1 + confirmation of which new artifacts to create in the next turn" } +t_v2_1_review_pending_02 = { status = "pending", commit_sha = "", description = "Create canonical DOD file at conductor/code_styleguides/data_oriented_design.md (cloned/adapted from nagent's context/data-oriented-design.md)" } +t_v2_1_review_pending_03 = { status = "pending", commit_sha = "", description = "Update AGENTS.md to add @conductor/code_styleguides/data_oriented_design.md import + 'what this is' section" } +t_v2_1_review_pending_04 = { status = "pending", commit_sha = "", description = "Create ./docs/AGENTS.md as agent-facing mirror of docs/Readme.md (which stays human-facing)" } +t_v2_1_review_pending_05 = { status = "pending", commit_sha = "", description = "Write 5 new styleguides (agent_memory_dimensions.md, rag_integration_discipline.md, cache_friendly_context.md, knowledge_artifacts.md, feature_flags.md)" } +t_v2_1_review_pending_06 = { status = "pending", commit_sha = "", description = "Write 3 new docs (guide_knowledge_curation.md, guide_caching_strategy.md, guide_agent_memory_dimensions.md)" } +t_v2_1_review_pending_07 = { status = "pending", commit_sha = "", description = "Update existing workflow docs (conductor/workflow.md, conductor/product-guidelines.md, docs/guide_mma.md, docs/guide_ai_client.md) with v2.1 patterns" } +t_v2_1_review_pending_08 = { status = "pending", commit_sha = "", description = "Verify Candidate 15 (save-with-graceful-summary-failure) by reading src/ai_client.py:run_discussion_compression" } + [user_wants_recorded] # User explicitly wants these in priority order (see decisions.md for full detail) want_1_sub_conversation_runner = "EXPLICIT: 'I probably want to add that for just 1:1 discussions where I use a sub-agent manually for specific points'"