feat(ai): Harden tool access exclusion across all providers

2026-05-02 13:23:00 -04:00
parent f8cd125c8a
commit 2666a3390d
4 changed files with 75 additions and 6 deletions
@@ -25,7 +25,7 @@ This file tracks all major tracks for the project. Each track has its own detail
 4. [x] **Track: Codebase Audit and Cleanup**
    *Link: [./tracks/codebase_audit_20260308/](./tracks/codebase_audit_20260308/)*

-5. [ ] **Track: Expanded Test Coverage and Stress Testing**
+5. [~] **Track: Expanded Test Coverage and Stress Testing**
   *Link: [./tracks/test_coverage_expansion_20260309/](./tracks/test_coverage_expansion_20260309/)*

 6. [ ] **Track: Beads Mode Integration**
@@ -1,8 +1,8 @@
 # Implementation Plan: Expanded Test Coverage and Stress Testing

 ## Phase 1: Tool Accessibility and State Unit Tests
- [ ] Task: Review current tool registration and disabling logic in `src/mcp_client.py` and `src/api_hooks.py`.
- [ ] Task: Write Tests: Create unit tests in `tests/test_agent_tools_wiring.py` (or similar) to verify turning a tool off removes it from the agent's available tool list.
+- [x] Task: Review current tool registration and disabling logic in `src/mcp_client.py` and `src/api_hooks.py`.
+- [~] Task: Write Tests: Create unit tests in `tests/test_agent_tools_wiring.py` (or similar) to verify turning a tool off removes it from the agent's available tool list.
 - [ ] Task: Implement: If tests fail due to missing logic, update the tool filtering implementation to ensure disabled tools are strictly excluded from the context sent to the provider.
 - [ ] Task: Conductor - User Manual Verification 'Phase 1: Tool Accessibility and State Unit Tests' (Protocol in workflow.md)

@@ -396,7 +396,7 @@ def reset_session() -> None:
 global _anthropic_client, _anthropic_history
 global _deepseek_client, _deepseek_history
 global _minimax_client, _minimax_history
- global _CACHED_ANTHROPIC_TOOLS
+ global _CACHED_ANTHROPIC_TOOLS, _CACHED_DEEPSEEK_TOOLS
 global _gemini_cli_adapter
 if _gemini_client and _gemini_cache:
  try:
@@ -425,6 +425,7 @@ def reset_session() -> None:
 with _minimax_history_lock:
  _minimax_history = []
 _CACHED_ANTHROPIC_TOOLS = None
+ _CACHED_DEEPSEEK_TOOLS = None
 file_cache.reset_client()

 def get_gemini_cache_stats() -> dict[str, Any]:
@@ -501,13 +502,14 @@ _agent_tools: dict[str, bool] = {}

 def set_agent_tools(tools: dict[str, bool]) -> None:
 """Configures which tools are enabled for the AI agent."""
- global _agent_tools, _CACHED_ANTHROPIC_TOOLS
+ global _agent_tools, _CACHED_ANTHROPIC_TOOLS, _CACHED_DEEPSEEK_TOOLS
 _agent_tools = tools
 _CACHED_ANTHROPIC_TOOLS = None
+ _CACHED_DEEPSEEK_TOOLS = None

 def set_tool_preset(preset_name: Optional[str]) -> None:
 """Loads a tool preset and applies it via set_agent_tools."""
- global _agent_tools, _CACHED_ANTHROPIC_TOOLS, _tool_approval_modes, _active_tool_preset
+ global _agent_tools, _CACHED_ANTHROPIC_TOOLS, _CACHED_DEEPSEEK_TOOLS, _tool_approval_modes, _active_tool_preset
 _tool_approval_modes = {}
 if not preset_name or preset_name == "None":
  # Enable all tools if no preset
@@ -534,6 +536,7 @@ def set_tool_preset(preset_name: Optional[str]) -> None:
   sys.stderr.write(f"[ERROR] Failed to set tool preset '{preset_name}': {e}\n")
   sys.stderr.flush()
 _CACHED_ANTHROPIC_TOOLS = None
+ _CACHED_DEEPSEEK_TOOLS = None

 def set_bias_profile(profile_name: Optional[str]) -> None:
 """Sets the active tool bias profile for tuning model behavior."""
@@ -0,0 +1,66 @@
+import pytest
+from src import ai_client
+from src import mcp_client
+
+def test_set_agent_tools_clears_caches():
+ ai_client._CACHED_ANTHROPIC_TOOLS = [{"dummy": "data"}]
+ ai_client._CACHED_DEEPSEEK_TOOLS = [{"dummy": "data"}]
+ 
+ ai_client.set_agent_tools({"read_file": True})
+ 
+ assert ai_client._CACHED_ANTHROPIC_TOOLS is None
+ assert ai_client._CACHED_DEEPSEEK_TOOLS is None
+
+def test_gemini_tool_declaration_excludes_disabled():
+ # Test explicit disable
+ ai_client.set_agent_tools({"read_file": False})
+ tool = ai_client._gemini_tool_declaration()
+ names = [f.name for f in tool.function_declarations] if tool else []
+ assert "read_file" not in names
+ 
+ # Test enable only one
+ all_tools = {name: False for name in mcp_client.TOOL_NAMES}
+ all_tools[ai_client.TOOL_NAME] = False
+ all_tools["read_file"] = True
+ ai_client.set_agent_tools(all_tools)
+ tool = ai_client._gemini_tool_declaration()
+ names = [f.name for f in tool.function_declarations] if tool else []
+ assert "read_file" in names
+ assert "write_file" not in names
+ assert ai_client.TOOL_NAME not in names
+
+def test_build_anthropic_tools_excludes_disabled():
+ # Test explicit disable
+ ai_client.set_agent_tools({"read_file": False})
+ tools = ai_client._build_anthropic_tools()
+ names = [t["name"] for t in tools]
+ assert "read_file" not in names
+ 
+ # Test enable only one
+ all_tools = {name: False for name in mcp_client.TOOL_NAMES}
+ all_tools[ai_client.TOOL_NAME] = False
+ all_tools["read_file"] = True
+ ai_client.set_agent_tools(all_tools)
+ tools = ai_client._build_anthropic_tools()
+ names = [t["name"] for t in tools]
+ assert "read_file" in names
+ assert "write_file" not in names
+ assert ai_client.TOOL_NAME not in names
+
+def test_build_deepseek_tools_excludes_disabled():
+ # Test explicit disable
+ ai_client.set_agent_tools({"read_file": False})
+ tools = ai_client._build_deepseek_tools()
+ names = [t["function"]["name"] for t in tools]
+ assert "read_file" not in names
+ 
+ # Test enable only one
+ all_tools = {name: False for name in mcp_client.TOOL_NAMES}
+ all_tools[ai_client.TOOL_NAME] = False
+ all_tools["read_file"] = True
+ ai_client.set_agent_tools(all_tools)
+ tools = ai_client._build_deepseek_tools()
+ names = [t["function"]["name"] for t in tools]
+ assert "read_file" in names
+ assert "write_file" not in names
+ assert ai_client.TOOL_NAME not in names