7fee76f491
Phase 5 t5_1, t5_2, t5_3: populate the v2 capability matrix
for the 3 vendors that had no registry entries. Previously,
get_capabilities('anthropic', ...) raised KeyError and the
GUI fell back to the 'unregistered' defaults. Now all 8
vendors in PROVIDERS are on the matrix.
Entries added:
anthropic/* (12 entries)
- wildcard + 8 sonnet/opus variants + haiku-4-5 + claude-fable-5
- caching=True, structured_output=True, file_search=True,
mcp_support=True, computer_use=True (per Claude 3.5+ docs)
- cost: sonnet=\/\, opus=\/\, haiku=\/\
- context_window=200000 (Claude 3+ standard)
gemini/* (5 entries)
- wildcard + 3.1-pro-preview + 3-flash-preview + 2.5-flash + 2.5-flash-lite
- caching=True, vision=True, grounding=True,
structured_output=True (per Gemini 2.5+ docs)
- video=True, audio=True (for 2.5+ and 3.x; lite has no video/audio)
- cost: 3.1-pro=\.50/\.50, 3-flash=\.15/\.60,
2.5-flash=\.15/\.60, 2.5-flash-lite=\.075/\.30
- context_window=1000000 (Gemini 2.5+ standard)
deepseek/* (4 entries)
- wildcard + deepseek-v3 + deepseek-reasoner + deepseek-r1
- reasoning=True (for r1/reasoner; v3 has structured_output=True only)
- structured_output=True (all)
- cost: v3=\.27/\.10, r1=\.55/\.19
- context_window=32768
Tests:
- 9 new tests in tests/test_vendor_capabilities.py:
* anthropic: sonnet/opus/haiku/wildcard entry tests
* gemini: pro-preview + vision + wildcard tests
* deepseek: reasoner + wildcard tests
- 116/116 vendor+tool+provider+import-isolation tests pass
(no regressions; +9 new tests this commit)
- 3 audit scripts pass
196 lines
7.2 KiB
Python
196 lines
7.2 KiB
Python
import pytest
|
|
from src.vendor_capabilities import VendorCapabilities, get_capabilities, register
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _clean_registry():
|
|
import src.vendor_capabilities
|
|
snapshot = src.vendor_capabilities._REGISTRY.copy()
|
|
yield
|
|
src.vendor_capabilities._REGISTRY.clear()
|
|
src.vendor_capabilities._REGISTRY.update(snapshot)
|
|
|
|
def test_registry_lookup_known_model():
|
|
caps = VendorCapabilities(
|
|
vendor='qwen',
|
|
model='qwen-max',
|
|
vision=False,
|
|
context_window=32768
|
|
)
|
|
register(caps)
|
|
retrieved = get_capabilities('qwen', 'qwen-max')
|
|
assert retrieved.vendor == 'qwen'
|
|
assert retrieved.model == 'qwen-max'
|
|
assert retrieved.context_window == 32768
|
|
assert retrieved.vision is False
|
|
|
|
def test_fallback_to_vendor_default():
|
|
caps = VendorCapabilities(
|
|
vendor='llama',
|
|
model='*',
|
|
context_window=131072,
|
|
cost_tracking=False
|
|
)
|
|
register(caps)
|
|
retrieved = get_capabilities('llama', 'llama-3.3-future-unregistered')
|
|
assert retrieved.context_window == 131072
|
|
assert retrieved.cost_tracking is False
|
|
|
|
def test_unknown_vendor_raises():
|
|
with pytest.raises(KeyError, match='No capabilities registered'):
|
|
get_capabilities('nonexistent_vendor', 'anymodel')
|
|
|
|
V2_FIELDS: list[str] = [
|
|
'local', 'reasoning', 'structured_output', 'code_execution',
|
|
'web_search', 'x_search', 'file_search', 'mcp_support',
|
|
'audio', 'video', 'grounding', 'computer_use',
|
|
]
|
|
|
|
@pytest.mark.parametrize('field_name', V2_FIELDS)
|
|
def test_v2_field_default_is_false(field_name: str) -> None:
|
|
caps = VendorCapabilities(vendor='test', model='m')
|
|
assert getattr(caps, field_name) is False, f'{field_name} should default to False'
|
|
|
|
@pytest.mark.parametrize('field_name', V2_FIELDS)
|
|
def test_v2_field_round_trip(field_name: str) -> None:
|
|
caps = VendorCapabilities(vendor='test', model='m', **{field_name: True})
|
|
assert getattr(caps, field_name) is True, f'{field_name} should round-trip to True'
|
|
|
|
def test_v2_local_flag_works_for_local_vendor() -> None:
|
|
register(VendorCapabilities(vendor='llama', model='llama-local-test-3.1', local=True))
|
|
caps = get_capabilities('llama', 'llama-local-test-3.1')
|
|
assert caps.local is True
|
|
|
|
def test_v2_local_flag_falls_back_to_wildcard() -> None:
|
|
register(VendorCapabilities(vendor='llama', model='*', local=True))
|
|
caps = get_capabilities('llama', 'some-unregistered-model-3.1-future')
|
|
assert caps.local is True
|
|
|
|
def test_v2_local_flag_does_not_affect_other_vendors() -> None:
|
|
register(VendorCapabilities(vendor='llama', model='*', local=True))
|
|
register(VendorCapabilities(vendor='qwen', model='*'))
|
|
caps = get_capabilities('qwen', 'qwen-turbo')
|
|
assert caps.local is False
|
|
|
|
def test_runtime_caps_override_sets_local_for_llama_localhost() -> None:
|
|
from dataclasses import replace
|
|
base = VendorCapabilities(vendor='llama', model='llama-3.1-70b-versatile')
|
|
assert base.local is False
|
|
overridden = replace(base, local=True)
|
|
assert overridden.local is True
|
|
overridden2 = replace(overridden, local=False)
|
|
assert overridden2.local is False
|
|
|
|
def test_v2_per_model_population() -> None:
|
|
caps = get_capabilities('minimax', 'MiniMax-M2.5')
|
|
assert caps.reasoning is True
|
|
caps_old = get_capabilities('minimax', 'MiniMax-M2')
|
|
assert caps_old.reasoning is False
|
|
caps_grok_v = get_capabilities('grok', 'grok-2-vision')
|
|
assert caps_grok_v.web_search is True
|
|
assert caps_grok_v.x_search is True
|
|
assert caps_grok_v.vision is True
|
|
caps_qwen_audio = get_capabilities('qwen', 'qwen-audio')
|
|
assert caps_qwen_audio.audio is True
|
|
caps_qwen_long = get_capabilities('qwen', 'qwen-long')
|
|
assert caps_qwen_long.caching is True
|
|
caps_llama_reasoning = get_capabilities('llama', 'llama-3.1-405b-reasoning')
|
|
assert caps_llama_reasoning.reasoning is True
|
|
caps_llama_plain = get_capabilities('llama', 'llama-3.1-8b-instant')
|
|
assert caps_llama_plain.reasoning is False
|
|
|
|
def test_runtime_caps_override_helper_for_llama_localhost() -> None:
|
|
from src import gui_2
|
|
from src import ai_client
|
|
original_url = ai_client._llama_base_url
|
|
try:
|
|
class MockApp:
|
|
current_provider = 'llama'
|
|
mock = MockApp()
|
|
caps = VendorCapabilities(vendor='llama', model='llama-3.1-70b-versatile')
|
|
ai_client._llama_base_url = 'https://openrouter.ai/api/v1'
|
|
result = gui_2._apply_runtime_caps_override(mock, caps)
|
|
assert result.local is False
|
|
ai_client._llama_base_url = 'http://localhost:11434/v1'
|
|
result = gui_2._apply_runtime_caps_override(mock, caps)
|
|
assert result.local is True
|
|
finally:
|
|
ai_client._llama_base_url = original_url
|
|
|
|
def test_runtime_caps_override_helper_does_not_touch_other_vendors() -> None:
|
|
from src import gui_2
|
|
from src import ai_client
|
|
original_url = ai_client._llama_base_url
|
|
try:
|
|
class MockApp:
|
|
current_provider = 'qwen'
|
|
mock = MockApp()
|
|
caps = VendorCapabilities(vendor='qwen', model='qwen-turbo')
|
|
ai_client._llama_base_url = 'http://localhost:11434/v1'
|
|
result = gui_2._apply_runtime_caps_override(mock, caps)
|
|
assert result.local is False
|
|
finally:
|
|
ai_client._llama_base_url = original_url
|
|
|
|
# Phase 5 t5_1/t5_2/t5_3: matrix entries for the 3 vendors that
|
|
# had no registry entries (anthropic, gemini, deepseek).
|
|
# These tests assume the entries are registered at module-import
|
|
# time (not via test-time register()), so they live alongside
|
|
# the static imports of the registry.
|
|
|
|
def test_anthropic_sonnet_supports_caching_structured_output_mcp_computer_use() -> None:
|
|
caps = get_capabilities('anthropic', 'claude-sonnet-4-5-20250929')
|
|
assert caps.caching is True
|
|
assert caps.structured_output is True
|
|
assert caps.mcp_support is True
|
|
assert caps.computer_use is True
|
|
assert caps.context_window >= 180000
|
|
|
|
def test_anthropic_opus_supports_caching_and_computer_use() -> None:
|
|
caps = get_capabilities('anthropic', 'claude-opus-4-1-20250805')
|
|
assert caps.caching is True
|
|
assert caps.computer_use is True
|
|
assert caps.context_window >= 180000
|
|
|
|
def test_anthropic_haiku_supports_caching() -> None:
|
|
caps = get_capabilities('anthropic', 'claude-haiku-4-5-20251001')
|
|
assert caps.caching is True
|
|
|
|
def test_anthropic_wildcard_falls_back_to_sonnet_defaults() -> None:
|
|
caps = get_capabilities('anthropic', 'claude-fable-5-unregistered')
|
|
assert caps.caching is True
|
|
assert caps.structured_output is True
|
|
assert caps.mcp_support is True
|
|
assert caps.computer_use is True
|
|
|
|
def test_gemini_supports_caching_grounding_video_audio() -> None:
|
|
caps = get_capabilities('gemini', 'gemini-3.1-pro-preview')
|
|
assert caps.caching is True
|
|
assert caps.grounding is True
|
|
assert caps.video is True
|
|
assert caps.audio is True
|
|
assert caps.structured_output is True
|
|
assert caps.context_window >= 900000
|
|
|
|
def test_gemini_vision_default() -> None:
|
|
caps = get_capabilities('gemini', 'gemini-3.1-pro-preview')
|
|
assert caps.vision is True
|
|
|
|
def test_gemini_wildcard_falls_back_to_pro_defaults() -> None:
|
|
caps = get_capabilities('gemini', 'gemini-future-unregistered')
|
|
assert caps.caching is True
|
|
assert caps.grounding is True
|
|
assert caps.video is True
|
|
assert caps.audio is True
|
|
assert caps.vision is True
|
|
assert caps.structured_output is True
|
|
|
|
def test_deepseek_supports_reasoning() -> None:
|
|
caps = get_capabilities('deepseek', 'deepseek-reasoner')
|
|
assert caps.reasoning is True
|
|
assert caps.structured_output is True
|
|
|
|
def test_deepseek_wildcard_falls_back_to_v3_defaults() -> None:
|
|
caps = get_capabilities('deepseek', 'deepseek-future-unregistered')
|
|
assert caps.reasoning is True
|
|
assert caps.structured_output is True
|