From 7fee76f49182055a076b830b67f77e2ad486cb3b Mon Sep 17 00:00:00 2001 From: Ed_ Date: Thu, 11 Jun 2026 21:35:32 -0400 Subject: [PATCH] feat(capability_matrix): add anthropic, gemini, deepseek registry entries Phase 5 t5_1, t5_2, t5_3: populate the v2 capability matrix for the 3 vendors that had no registry entries. Previously, get_capabilities('anthropic', ...) raised KeyError and the GUI fell back to the 'unregistered' defaults. Now all 8 vendors in PROVIDERS are on the matrix. Entries added: anthropic/* (12 entries) - wildcard + 8 sonnet/opus variants + haiku-4-5 + claude-fable-5 - caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True (per Claude 3.5+ docs) - cost: sonnet=\/\, opus=\/\, haiku=\/\ - context_window=200000 (Claude 3+ standard) gemini/* (5 entries) - wildcard + 3.1-pro-preview + 3-flash-preview + 2.5-flash + 2.5-flash-lite - caching=True, vision=True, grounding=True, structured_output=True (per Gemini 2.5+ docs) - video=True, audio=True (for 2.5+ and 3.x; lite has no video/audio) - cost: 3.1-pro=\.50/\.50, 3-flash=\.15/\.60, 2.5-flash=\.15/\.60, 2.5-flash-lite=\.075/\.30 - context_window=1000000 (Gemini 2.5+ standard) deepseek/* (4 entries) - wildcard + deepseek-v3 + deepseek-reasoner + deepseek-r1 - reasoning=True (for r1/reasoner; v3 has structured_output=True only) - structured_output=True (all) - cost: v3=\.27/\.10, r1=\.55/\.19 - context_window=32768 Tests: - 9 new tests in tests/test_vendor_capabilities.py: * anthropic: sonnet/opus/haiku/wildcard entry tests * gemini: pro-preview + vision + wildcard tests * deepseek: reasoner + wildcard tests - 116/116 vendor+tool+provider+import-isolation tests pass (no regressions; +9 new tests this commit) - 3 audit scripts pass --- src/vendor_capabilities.py | 23 ++++++++++- tests/test_vendor_capabilities.py | 63 +++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/src/vendor_capabilities.py b/src/vendor_capabilities.py index 03b758c4..42ea44e4 100644 --- a/src/vendor_capabilities.py +++ b/src/vendor_capabilities.py @@ -69,4 +69,25 @@ register(VendorCapabilities(vendor='qwen', model='qwen-max', context_window=3276 register(VendorCapabilities(vendor='qwen', model='qwen-long', context_window=1000000, cost_input_per_mtok=0.07, cost_output_per_mtok=0.28, caching=True, notes='qwen-long supports custom chunked long-context caching')) register(VendorCapabilities(vendor='qwen', model='qwen-vl-plus', vision=True, context_window=131072, cost_input_per_mtok=0.21, cost_output_per_mtok=0.63)) register(VendorCapabilities(vendor='qwen', model='qwen-vl-max', vision=True, context_window=32768, cost_input_per_mtok=0.50, cost_output_per_mtok=1.50)) -register(VendorCapabilities(vendor='qwen', model='qwen-audio', context_window=32768, cost_input_per_mtok=0.10, cost_output_per_mtok=0.30, audio=True, notes='Audio input support added 2026-06-11 (v2 matrix)')) \ No newline at end of file +register(VendorCapabilities(vendor='qwen', model='qwen-audio', context_window=32768, cost_input_per_mtok=0.10, cost_output_per_mtok=0.30, audio=True, notes='Audio input support added 2026-06-11 (v2 matrix)')) +register(VendorCapabilities(vendor='anthropic', model='*', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True, notes='Anthropic wildcard: Sonnet defaults. Per-model variations below.')) +register(VendorCapabilities(vendor='anthropic', model='claude-sonnet-4-5-20250929', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-sonnet-4-20250514', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-sonnet-4-6', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-1-20250805', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-20250514', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-5-20251101', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-6', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-7', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-8', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-haiku-4-5-20251001', context_window=200000, cost_input_per_mtok=1.00, cost_output_per_mtok=5.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-fable-5', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='gemini', model='*', context_window=1000000, cost_input_per_mtok=1.25, cost_output_per_mtok=5.00, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True, notes='Gemini wildcard: 1M+ context window. Per-model variations below.')) +register(VendorCapabilities(vendor='gemini', model='gemini-3.1-pro-preview', context_window=1000000, cost_input_per_mtok=3.50, cost_output_per_mtok=10.50, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True)) +register(VendorCapabilities(vendor='gemini', model='gemini-3-flash-preview', context_window=1000000, cost_input_per_mtok=0.15, cost_output_per_mtok=0.60, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True)) +register(VendorCapabilities(vendor='gemini', model='gemini-2.5-flash', context_window=1000000, cost_input_per_mtok=0.15, cost_output_per_mtok=0.60, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True)) +register(VendorCapabilities(vendor='gemini', model='gemini-2.5-flash-lite', context_window=1000000, cost_input_per_mtok=0.075, cost_output_per_mtok=0.30, caching=True, vision=True, grounding=True, structured_output=True)) +register(VendorCapabilities(vendor='deepseek', model='*', context_window=32768, cost_input_per_mtok=0.27, cost_output_per_mtok=1.10, reasoning=True, structured_output=True, notes='DeepSeek wildcard: V3 defaults. R1/reasoner variants below.')) +register(VendorCapabilities(vendor='deepseek', model='deepseek-v3', context_window=32768, cost_input_per_mtok=0.27, cost_output_per_mtok=1.10, structured_output=True)) +register(VendorCapabilities(vendor='deepseek', model='deepseek-reasoner', context_window=32768, cost_input_per_mtok=0.55, cost_output_per_mtok=2.19, reasoning=True, structured_output=True)) +register(VendorCapabilities(vendor='deepseek', model='deepseek-r1', context_window=32768, cost_input_per_mtok=0.55, cost_output_per_mtok=2.19, reasoning=True, structured_output=True)) diff --git a/tests/test_vendor_capabilities.py b/tests/test_vendor_capabilities.py index 8854a219..139e2996 100644 --- a/tests/test_vendor_capabilities.py +++ b/tests/test_vendor_capabilities.py @@ -130,3 +130,66 @@ def test_runtime_caps_override_helper_does_not_touch_other_vendors() -> None: assert result.local is False finally: ai_client._llama_base_url = original_url + +# Phase 5 t5_1/t5_2/t5_3: matrix entries for the 3 vendors that +# had no registry entries (anthropic, gemini, deepseek). +# These tests assume the entries are registered at module-import +# time (not via test-time register()), so they live alongside +# the static imports of the registry. + +def test_anthropic_sonnet_supports_caching_structured_output_mcp_computer_use() -> None: + caps = get_capabilities('anthropic', 'claude-sonnet-4-5-20250929') + assert caps.caching is True + assert caps.structured_output is True + assert caps.mcp_support is True + assert caps.computer_use is True + assert caps.context_window >= 180000 + +def test_anthropic_opus_supports_caching_and_computer_use() -> None: + caps = get_capabilities('anthropic', 'claude-opus-4-1-20250805') + assert caps.caching is True + assert caps.computer_use is True + assert caps.context_window >= 180000 + +def test_anthropic_haiku_supports_caching() -> None: + caps = get_capabilities('anthropic', 'claude-haiku-4-5-20251001') + assert caps.caching is True + +def test_anthropic_wildcard_falls_back_to_sonnet_defaults() -> None: + caps = get_capabilities('anthropic', 'claude-fable-5-unregistered') + assert caps.caching is True + assert caps.structured_output is True + assert caps.mcp_support is True + assert caps.computer_use is True + +def test_gemini_supports_caching_grounding_video_audio() -> None: + caps = get_capabilities('gemini', 'gemini-3.1-pro-preview') + assert caps.caching is True + assert caps.grounding is True + assert caps.video is True + assert caps.audio is True + assert caps.structured_output is True + assert caps.context_window >= 900000 + +def test_gemini_vision_default() -> None: + caps = get_capabilities('gemini', 'gemini-3.1-pro-preview') + assert caps.vision is True + +def test_gemini_wildcard_falls_back_to_pro_defaults() -> None: + caps = get_capabilities('gemini', 'gemini-future-unregistered') + assert caps.caching is True + assert caps.grounding is True + assert caps.video is True + assert caps.audio is True + assert caps.vision is True + assert caps.structured_output is True + +def test_deepseek_supports_reasoning() -> None: + caps = get_capabilities('deepseek', 'deepseek-reasoner') + assert caps.reasoning is True + assert caps.structured_output is True + +def test_deepseek_wildcard_falls_back_to_v3_defaults() -> None: + caps = get_capabilities('deepseek', 'deepseek-future-unregistered') + assert caps.reasoning is True + assert caps.structured_output is True