From f258fc5765d647eb821716bce3d2000fb2ba2857 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sat, 21 Feb 2026 21:42:42 -0500 Subject: [PATCH] progress --- MainContext.md | 15 +++ ai_client.py | 128 ++++++++++++++++------ gui.py | 15 ++- manual_slop.toml | 20 +++- mcp_client.py | 275 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 412 insertions(+), 41 deletions(-) create mode 100644 mcp_client.py diff --git a/MainContext.md b/MainContext.md index 8b1b9ea..23a3845 100644 --- a/MainContext.md +++ b/MainContext.md @@ -28,6 +28,8 @@ Is a local GUI tool for manually curating and sending context to AI APIs. It agg - `theme.py` - palette definitions, font loading, scale, load_from_config/save_to_config - `gemini.py` - legacy standalone Gemini wrapper (not used by the main GUI; superseded by `ai_client.py`) - `file_cache.py` - stub; Anthropic Files API path removed; kept so stale imports don't break +- `mcp_client.py` - MCP-style read-only file tools (read_file, list_directory, search_files, get_file_summary); allowlist enforced against project file_items + base_dirs; dispatched by ai_client tool-use loop for both Anthropic and Gemini +- `summarize.py` - local heuristic summariser (no AI); .py via AST, .toml via regex, .md headings, generic preview; used by mcp_client.get_file_summary and aggregate.build_summary_section - `config.toml` - global-only settings: [ai] provider+model, [theme] palette+font+scale, [projects] paths array + active path - `manual_slop.toml` - per-project file: [project] name+git_dir, [output] namespace+output_dir, [files] base_dir+paths, [screenshots] base_dir+paths, [discussion] roles+active+[discussion.discussions.] git_commit+last_updated+history - `credentials.toml` - gemini api_key, anthropic api_key @@ -179,6 +181,19 @@ Entry layout: index + timestamp + direction + kind + provider/model header row, - `_classify_anthropic_error` and `_classify_gemini_error` inspect exception types and status codes/message bodies to assign the kind - `ui_message()` returns a human-readable label for display in the Response panel +**MCP file tools (mcp_client.py + ai_client.py):** +- Four read-only tools exposed to the AI as native function/tool declarations: `read_file`, `list_directory`, `search_files`, `get_file_summary` +- Access control: `mcp_client.configure(file_items, extra_base_dirs)` is called before each send; builds an allowlist of resolved absolute paths from the project's `file_items` plus the `base_dir`; any path that is not explicitly in the list or not under one of the allowed directories returns `ACCESS DENIED` +- `mcp_client.dispatch(tool_name, tool_input)` is the single dispatch entry point used by both Anthropic and Gemini tool-use loops +- Anthropic: MCP tools appear before `run_powershell` in the tools list (no `cache_control` on them; only `run_powershell` carries `cache_control: ephemeral`) +- Gemini: MCP tools are included in the `FunctionDeclaration` list alongside `run_powershell` +- `get_file_summary` uses `summarize.summarise_file()` — same heuristic used for the initial `` block, so the AI gets the same compact structural view it already knows +- `list_directory` sorts dirs before files; shows name, type, and size +- `search_files` uses `Path.glob()` with the caller-supplied pattern (supports `**/*.py` style) +- `read_file` returns raw UTF-8 text; errors (not found, access denied, decode error) are returned as error strings rather than exceptions, so the AI sees them as tool results +- `summarize.py` heuristics: `.py` → AST imports + ALL_CAPS constants + classes+methods + top-level functions; `.toml` → table headers + top-level keys; `.md` → h1–h3 headings with indentation; all others → line count + first 8 lines preview +- Comms log: MCP tool calls log `OUT/tool_call` with `{"name": ..., "args": {...}}` and `IN/tool_result` with `{"name": ..., "output": ...}`; rendered in the Comms History panel via `_render_payload_tool_call` (shows each arg key/value) and `_render_payload_tool_result` (shows output) + **Known extension points:** - Add more providers by adding a section to `credentials.toml`, a `_list_*` and `_send_*` function in `ai_client.py`, and the provider name to the `PROVIDERS` list in `gui.py` - System prompt support could be added as a field in the project `.toml` and passed in `ai_client.send()` diff --git a/ai_client.py b/ai_client.py index c05ffbd..cc14b0e 100644 --- a/ai_client.py +++ b/ai_client.py @@ -4,6 +4,7 @@ import json import datetime from pathlib import Path import file_cache +import mcp_client _provider: str = "gemini" _model: str = "gemini-2.0-flash" @@ -33,7 +34,7 @@ MAX_TOOL_ROUNDS = 5 _ANTHROPIC_CHUNK_SIZE = 180_000 _ANTHROPIC_SYSTEM = ( - "You are a helpful coding assistant with access to a PowerShell tool. " + "You are a helpful coding assistant with access to a PowerShell tool and MCP file tools (read_file, list_directory, search_files, get_file_summary). " "When asked to create or edit files, prefer targeted edits over full rewrites. " "Always explain what you are doing before invoking the tool.\n\n" "When writing or rewriting large files (especially those containing quotes, backticks, or special characters), " @@ -221,8 +222,16 @@ def _list_anthropic_models() -> list[str]: TOOL_NAME = "run_powershell" -_ANTHROPIC_TOOLS = [ - { +def _build_anthropic_tools() -> list[dict]: + """Build the full Anthropic tools list: run_powershell + MCP file tools.""" + mcp_tools = [] + for spec in mcp_client.MCP_TOOL_SPECS: + mcp_tools.append({ + "name": spec["name"], + "description": spec["description"], + "input_schema": spec["parameters"], + }) + powershell_tool = { "name": TOOL_NAME, "description": ( "Run a PowerShell script within the project base_dir. " @@ -243,34 +252,57 @@ _ANTHROPIC_TOOLS = [ }, "cache_control": {"type": "ephemeral"}, } -] + return mcp_tools + [powershell_tool] + + +_ANTHROPIC_TOOLS = _build_anthropic_tools() def _gemini_tool_declaration(): from google.genai import types - return types.Tool( - function_declarations=[ - types.FunctionDeclaration( - name=TOOL_NAME, - description=( - "Run a PowerShell script within the project base_dir. " - "Use this to create, edit, rename, or delete files and directories. " - "The working directory is set to base_dir automatically. " - "stdout and stderr are returned to you as the result." - ), - parameters=types.Schema( - type=types.Type.OBJECT, - properties={ - "script": types.Schema( - type=types.Type.STRING, - description="The PowerShell script to execute." - ) - }, - required=["script"] - ) + + declarations = [] + + # MCP file tools + for spec in mcp_client.MCP_TOOL_SPECS: + props = {} + for pname, pdef in spec["parameters"].get("properties", {}).items(): + props[pname] = types.Schema( + type=types.Type.STRING, + description=pdef.get("description", ""), ) - ] - ) + declarations.append(types.FunctionDeclaration( + name=spec["name"], + description=spec["description"], + parameters=types.Schema( + type=types.Type.OBJECT, + properties=props, + required=spec["parameters"].get("required", []), + ), + )) + + # PowerShell tool + declarations.append(types.FunctionDeclaration( + name=TOOL_NAME, + description=( + "Run a PowerShell script within the project base_dir. " + "Use this to create, edit, rename, or delete files and directories. " + "The working directory is set to base_dir automatically. " + "stdout and stderr are returned to you as the result." + ), + parameters=types.Schema( + type=types.Type.OBJECT, + properties={ + "script": types.Schema( + type=types.Type.STRING, + description="The PowerShell script to execute." + ) + }, + required=["script"] + ), + )) + + return types.Tool(function_declarations=declarations) def _run_script(script: str, base_dir: str) -> str: @@ -365,6 +397,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: try: _ensure_gemini_client() + mcp_client.configure(file_items or [], [base_dir]) if _gemini_chat is None: _gemini_chat = _gemini_client.chats.create( @@ -407,8 +440,20 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: function_responses = [] for fc in tool_calls: - if fc.name == TOOL_NAME: - script = fc.args.get("script", "") + fc_name = fc.name + fc_args = dict(fc.args) + if fc_name in mcp_client.TOOL_NAMES: + _append_comms("OUT", "tool_call", {"name": fc_name, "args": fc_args}) + output = mcp_client.dispatch(fc_name, fc_args) + _append_comms("IN", "tool_result", {"name": fc_name, "output": output}) + function_responses.append( + types.Part.from_function_response( + name=fc_name, + response={"output": output} + ) + ) + elif fc_name == TOOL_NAME: + script = fc_args.get("script", "") _append_comms("OUT", "tool_call", { "name": TOOL_NAME, "script": script, @@ -527,6 +572,7 @@ def _repair_anthropic_history(history: list[dict]): def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str: try: _ensure_anthropic_client() + mcp_client.configure(file_items or [], [base_dir]) context_blocks = _build_chunked_context_blocks(md_content) @@ -557,7 +603,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item "cache_control": {"type": "ephemeral"}, } ], - tools=_ANTHROPIC_TOOLS, + tools=_build_anthropic_tools(), messages=_anthropic_history, ) @@ -600,22 +646,36 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item tool_results = [] for block in response.content: - if getattr(block, "type", None) == "tool_use" and getattr(block, "name", None) == TOOL_NAME: - script = block.input.get("script", "") + if getattr(block, "type", None) != "tool_use": + continue + b_name = getattr(block, "name", None) + b_id = getattr(block, "id", "") + b_input = getattr(block, "input", {}) + if b_name in mcp_client.TOOL_NAMES: + _append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input}) + output = mcp_client.dispatch(b_name, b_input) + _append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": output}) + tool_results.append({ + "type": "tool_result", + "tool_use_id": b_id, + "content": output, + }) + elif b_name == TOOL_NAME: + script = b_input.get("script", "") _append_comms("OUT", "tool_call", { "name": TOOL_NAME, - "id": block.id, + "id": b_id, "script": script, }) output = _run_script(script, base_dir) _append_comms("IN", "tool_result", { "name": TOOL_NAME, - "id": block.id, + "id": b_id, "output": output, }) tool_results.append({ "type": "tool_result", - "tool_use_id": block.id, + "tool_use_id": b_id, "content": output, }) diff --git a/gui.py b/gui.py index 7999cca..9cc4e1b 100644 --- a/gui.py +++ b/gui.py @@ -1,4 +1,4 @@ -# gui.py +# gui.py import dearpygui.dearpygui as dpg import tomllib import tomli_w @@ -149,7 +149,16 @@ def _render_payload_tool_call(parent: str, payload: dict): _add_kv_row(parent, "name", payload.get("name", "")) if "id" in payload: _add_kv_row(parent, "id", payload["id"]) - _add_text_field(parent, "script", payload.get("script", "")) + # PowerShell tool uses 'script'; MCP file tools use 'args' dict + if "script" in payload: + _add_text_field(parent, "script", payload.get("script", "")) + elif "args" in payload: + args = payload["args"] + if isinstance(args, dict): + for ak, av in args.items(): + _add_text_field(parent, ak, str(av)) + else: + _add_text_field(parent, "args", str(args)) def _render_payload_tool_result(parent: str, payload: dict): @@ -385,7 +394,7 @@ class App: except Exception: continue - # No valid project file found — migrate from legacy config.toml + # No valid project file found - migrate from legacy config.toml self.project = project_manager.migrate_from_legacy_config(self.config) name = self.project.get("project", {}).get("name", "project") fallback_path = f"{name}.toml" diff --git a/manual_slop.toml b/manual_slop.toml index db7e492..42337ce 100644 --- a/manual_slop.toml +++ b/manual_slop.toml @@ -22,11 +22,16 @@ paths = [ "C:/projects/manual_slop/theme.py", "C:/projects/manual_slop/project_manager.py", "C:/projects/manual_slop/manual_slop.toml", + "C:/projects/manual_slop/mcp_client.py", + "C:/projects/manual_slop/summarize.py", ] [screenshots] base_dir = "C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02" -paths = [] +paths = [ + "C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02/2026-02-21_21-34-14.png", + "C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02/Fork_2026-02-21_21-34-27.png", +] [discussion] roles = [ @@ -59,9 +64,16 @@ history = [ ] [discussion.discussions."Support files or mcp, or proper dynamic context uptake."] -git_commit = "d9a51332efb3354c81876f9a230bcedd632e7fd2" -last_updated = "2026-02-21T21:29:16" -history = [] +git_commit = "59208b27ec31949cb45cae382dddaee645b53b4f" +last_updated = "2026-02-21T21:42:01" +history = [ + "@2026-02-21T21:30:26\nUser:\nLets add support for mcp. Similar to what claude code has, but restricted only to the paths provided. We have a basic summerizer right now but it doesn't work for all code files (we'd need tree-sitter). We need to add support for getting heurstics on what nuances each file has that is valuable for a basic overview before retrieving their content worst case to get a job done.", + "@2026-02-21T21:30:32\nAI:\nThe PowerShell `Set-Content` write corrupted the file — it re-encoded it as Windows-1252, turning the UTF-8 em-dash (0xE2 0x80 0x94) into 0x97. I need to read the file as bytes, find and fix the corruption, apply the three targeted changes, and write back as UTF-8.", + "@2026-02-21T21:32:36\nUser:\nI fixed gui.py not sure what happened there. pelases keep adding mcp support...", + "@2026-02-21T21:34:03\nAI:\nERROR: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'prompt is too long: 204369 tokens > 200000 maximum'}, 'request_id': 'req_011CYNKqxGAjRaD8cXEbuGTt'}", + "@2026-02-21T21:39:13\nUser:\nYou got an error for a request (message too long). Continue where you left off I provided screenshots to hopefully help. I restarted the editor it has the last mcp_client.py and I added it to your context.", + "@2026-02-21T21:39:13\nAI:\nERROR: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'prompt is too long: 211450 tokens > 200000 maximum'}, 'request_id': 'req_011CYNLEVvnYZCCBugQ1sfQX'}", +] [discussion.discussions."Word Wrap"] git_commit = "" diff --git a/mcp_client.py b/mcp_client.py new file mode 100644 index 0000000..7de1963 --- /dev/null +++ b/mcp_client.py @@ -0,0 +1,275 @@ +# mcp_client.py + +#MCP-style file context tools for manual_slop. + +# Exposes read-only filesystem tools the AI can call to selectively fetch file +# content on demand, instead of having everything inlined into the context block. +# All access is restricted to paths that are either: +# - Explicitly listed in the project's allowed_paths set, OR +# - Contained within an allowed base_dir (must resolve to a subpath of it) + +# Tools exposed: +# read_file(path) - return full UTF-8 content of a file +# list_directory(path) - list entries in a directory (names + type) +# search_files(path, pattern) - glob pattern search within an allowed dir +# get_file_summary(path) - return the summarize.py heuristic summary +# + +from pathlib import Path +import summarize + +# ------------------------------------------------------------------ state + +# Set by configure() before the AI send loop starts. +# allowed_paths : set of resolved absolute Path objects (files or dirs) +# base_dirs : set of resolved absolute Path dirs that act as roots +_allowed_paths: set[Path] = set() +_base_dirs: set[Path] = set() + + +def configure(file_items: list[dict], extra_base_dirs: list[str] | None = None): + """ + Build the allowlist from aggregate file_items. + Called by ai_client before each send so the list reflects the current project. + + file_items : list of dicts from aggregate.build_file_items() + extra_base_dirs : additional directory roots to allow traversal of + """ + global _allowed_paths, _base_dirs + _allowed_paths = set() + _base_dirs = set() + + for item in file_items: + p = item.get("path") + if p is not None: + rp = Path(p).resolve() + _allowed_paths.add(rp) + _base_dirs.add(rp.parent) + + if extra_base_dirs: + for d in extra_base_dirs: + dp = Path(d).resolve() + if dp.is_dir(): + _base_dirs.add(dp) + + +def _is_allowed(path: Path) -> bool: + """ + Return True if `path` is within the allowlist. + A path is allowed if: + - it is explicitly in _allowed_paths, OR + - it is contained within (or equal to) one of the _base_dirs + """ + rp = path.resolve() + if rp in _allowed_paths: + return True + for bd in _base_dirs: + try: + rp.relative_to(bd) + return True + except ValueError: + continue + return False + + +def _resolve_and_check(raw_path: str) -> tuple[Path | None, str]: + """ + Resolve raw_path and verify it passes the allowlist check. + Returns (resolved_path, error_string). error_string is empty on success. + """ + try: + p = Path(raw_path).resolve() + except Exception as e: + return None, f"ERROR: invalid path '{raw_path}': {e}" + if not _is_allowed(p): + return None, ( + f"ACCESS DENIED: '{raw_path}' is not within the allowed paths. " + f"Use list_directory or search_files on an allowed base directory first." + ) + return p, "" + + +# ------------------------------------------------------------------ tool implementations + +def read_file(path: str) -> str: + """Return the UTF-8 content of a file, or an error string.""" + p, err = _resolve_and_check(path) + if err: + return err + if not p.exists(): + return f"ERROR: file not found: {path}" + if not p.is_file(): + return f"ERROR: not a file: {path}" + try: + return p.read_text(encoding="utf-8") + except Exception as e: + return f"ERROR reading '{path}': {e}" + + +def list_directory(path: str) -> str: + """List entries in a directory. Returns a compact text table.""" + p, err = _resolve_and_check(path) + if err: + return err + if not p.exists(): + return f"ERROR: path not found: {path}" + if not p.is_dir(): + return f"ERROR: not a directory: {path}" + try: + entries = sorted(p.iterdir(), key=lambda e: (e.is_file(), e.name.lower())) + lines = [f"Directory: {p}", ""] + for entry in entries: + kind = "file" if entry.is_file() else "dir " + size = f"{entry.stat().st_size:>10,} bytes" if entry.is_file() else "" + lines.append(f" [{kind}] {entry.name:<40} {size}") + lines.append(f" ({len(entries)} entries)") + return "\n".join(lines) + except Exception as e: + return f"ERROR listing '{path}': {e}" + + +def search_files(path: str, pattern: str) -> str: + """ + Search for files matching a glob pattern within path. + pattern examples: '*.py', '**/*.toml', 'src/**/*.rs' + """ + p, err = _resolve_and_check(path) + if err: + return err + if not p.is_dir(): + return f"ERROR: not a directory: {path}" + try: + matches = sorted(p.glob(pattern)) + if not matches: + return f"No files matched '{pattern}' in {path}" + lines = [f"Search '{pattern}' in {p}:", ""] + for m in matches: + rel = m.relative_to(p) + kind = "file" if m.is_file() else "dir " + lines.append(f" [{kind}] {rel}") + lines.append(f" ({len(matches)} match(es))") + return "\n".join(lines) + except Exception as e: + return f"ERROR searching '{path}': {e}" + + +def get_file_summary(path: str) -> str: + """ + Return the heuristic summary for a file (same as the initial context block). + For .py files: imports, classes, methods, functions, constants. + For .toml: table keys. For .md: headings. Others: line count + preview. + """ + p, err = _resolve_and_check(path) + if err: + return err + if not p.exists(): + return f"ERROR: file not found: {path}" + if not p.is_file(): + return f"ERROR: not a file: {path}" + try: + content = p.read_text(encoding="utf-8") + return summarize.summarise_file(p, content) + except Exception as e: + return f"ERROR summarising '{path}': {e}" + + +# ------------------------------------------------------------------ tool dispatch + +TOOL_NAMES = {"read_file", "list_directory", "search_files", "get_file_summary"} + + +def dispatch(tool_name: str, tool_input: dict) -> str: + """ + Dispatch an MCP tool call by name. Returns the result as a string. + """ + if tool_name == "read_file": + return read_file(tool_input.get("path", "")) + if tool_name == "list_directory": + return list_directory(tool_input.get("path", "")) + if tool_name == "search_files": + return search_files(tool_input.get("path", ""), tool_input.get("pattern", "*")) + if tool_name == "get_file_summary": + return get_file_summary(tool_input.get("path", "")) + return f"ERROR: unknown MCP tool '{tool_name}'" + + +# ------------------------------------------------------------------ tool schema helpers +# These are imported by ai_client.py to build provider-specific declarations. + +MCP_TOOL_SPECS = [ + { + "name": "read_file", + "description": ( + "Read the full UTF-8 content of a file within the allowed project paths. " + "Use get_file_summary first to decide whether you need the full content." + ), + "parameters": { + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Absolute or relative path to the file to read.", + } + }, + "required": ["path"], + }, + }, + { + "name": "list_directory", + "description": ( + "List files and subdirectories within an allowed directory. " + "Shows name, type (file/dir), and size. Use this to explore the project structure." + ), + "parameters": { + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Absolute path to the directory to list.", + } + }, + "required": ["path"], + }, + }, + { + "name": "search_files", + "description": ( + "Search for files matching a glob pattern within an allowed directory. " + "Supports recursive patterns like '**/*.py'. " + "Use this to find files by extension or name pattern." + ), + "parameters": { + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Absolute path to the directory to search within.", + }, + "pattern": { + "type": "string", + "description": "Glob pattern, e.g. '*.py', '**/*.toml', 'src/**/*.rs'.", + }, + }, + "required": ["path", "pattern"], + }, + }, + { + "name": "get_file_summary", + "description": ( + "Get a compact heuristic summary of a file without reading its full content. " + "For Python: imports, classes, methods, functions, constants. " + "For TOML: table keys. For Markdown: headings. Others: line count + preview. " + "Use this before read_file to decide if you need the full content." + ), + "parameters": { + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Absolute or relative path to the file to summarise.", + } + }, + "required": ["path"], + }, + }, +]