diff --git a/src/ai_client.py b/src/ai_client.py index ffdeea87..21b0e5a8 100644 --- a/src/ai_client.py +++ b/src/ai_client.py @@ -188,6 +188,110 @@ _project_context_marker: str = "" #endregion: Provider Configuration +#region: Vendor Capabilities (moved from src/vendor_capabilities.py) + +@dataclass(frozen=True) +class VendorCapabilities: + vendor: str + model: str + vision: bool = False + tool_calling: bool = True + caching: bool = False + streaming: bool = True + model_discovery: bool = True + context_window: int = 8192 + cost_tracking: bool = True + cost_input_per_mtok: float = 0.0 + cost_output_per_mtok: float = 0.0 + notes: str = '' + local: bool = False + reasoning: bool = False + structured_output: bool = False + code_execution: bool = False + web_search: bool = False + x_search: bool = False + file_search: bool = False + mcp_support: bool = False + audio: bool = False + video: bool = False + grounding: bool = False + computer_use: bool = False + +_VENDOR_REGISTRY: dict[tuple[str, str], "VendorCapabilities"] = {} + +def register(cap: "VendorCapabilities") -> None: + _VENDOR_REGISTRY[(cap.vendor, cap.model)] = cap + +def get_capabilities(vendor: str, model: str) -> "VendorCapabilities": + if (vendor, model) in _VENDOR_REGISTRY: return _VENDOR_REGISTRY[(vendor, model)] + if (vendor, '*') in _VENDOR_REGISTRY: return _VENDOR_REGISTRY[(vendor, '*')] + raise KeyError(f'No capabilities registered for vendor={vendor!r} model={model!r}') + +def list_models_for_vendor(vendor: str) -> list[str]: + return sorted({m for v, m in _VENDOR_REGISTRY if v == vendor and m != '*'}) + +register(VendorCapabilities(vendor='minimax', model='*', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) +register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.7', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20, reasoning=True)) +register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.5', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20, reasoning=True)) +register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.1', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) +register(VendorCapabilities(vendor='minimax', model='MiniMax-M2', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) +register(VendorCapabilities(vendor='grok', model='*', context_window=131072, cost_input_per_mtok=2.00, cost_output_per_mtok=10.00, web_search=True, x_search=True)) +register(VendorCapabilities(vendor='grok', model='grok-2', context_window=131072, web_search=True, x_search=True)) +register(VendorCapabilities(vendor='grok', model='grok-2-vision', vision=True, context_window=32768, web_search=True, x_search=True)) +register(VendorCapabilities(vendor='grok', model='grok-beta', context_window=131072, cost_input_per_mtok=5.00, cost_output_per_mtok=15.00, web_search=True, x_search=True)) +register(VendorCapabilities(vendor='llama', model='*', context_window=131072)) +register(VendorCapabilities(vendor='llama', model='llama-3.1-8b-instant', context_window=131072, cost_input_per_mtok=0.05, cost_output_per_mtok=0.08)) +register(VendorCapabilities(vendor='llama', model='llama-3.1-70b-versatile', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79)) +register(VendorCapabilities(vendor='llama', model='llama-3.1-405b-reasoning', context_window=131072, cost_input_per_mtok=3.00, cost_output_per_mtok=3.00, reasoning=True)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-1b-preview', context_window=131072, cost_input_per_mtok=0.04, cost_output_per_mtok=0.04)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-3b-preview', context_window=131072, cost_input_per_mtok=0.06, cost_output_per_mtok=0.06)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-11b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.18, cost_output_per_mtok=0.18)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-90b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.90, cost_output_per_mtok=0.90)) +register(VendorCapabilities(vendor='llama', model='llama-3.3-70b-specdec', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79)) +register(VendorCapabilities(vendor='qwen', model='*', context_window=32768)) +register(VendorCapabilities(vendor='qwen', model='qwen-turbo', context_window=1000000, cost_input_per_mtok=0.05, cost_output_per_mtok=0.10)) +register(VendorCapabilities(vendor='qwen', model='qwen-plus', context_window=131072, cost_input_per_mtok=0.40, cost_output_per_mtok=1.20)) +register(VendorCapabilities(vendor='qwen', model='qwen-max', context_window=32768, cost_input_per_mtok=2.00, cost_output_per_mtok=6.00)) +register(VendorCapabilities(vendor='qwen', model='qwen-long', context_window=1000000, cost_input_per_mtok=0.07, cost_output_per_mtok=0.28, caching=True, notes='qwen-long supports custom chunked long-context caching')) +register(VendorCapabilities(vendor='qwen', model='qwen-vl-plus', vision=True, context_window=131072, cost_input_per_mtok=0.21, cost_output_per_mtok=0.63)) +register(VendorCapabilities(vendor='qwen', model='qwen-vl-max', vision=True, context_window=32768, cost_input_per_mtok=0.50, cost_output_per_mtok=1.50)) +register(VendorCapabilities(vendor='qwen', model='qwen-audio', context_window=32768, cost_input_per_mtok=0.10, cost_output_per_mtok=0.30, audio=True, notes='Audio input support added 2026-06-11 (v2 matrix)')) +register(VendorCapabilities(vendor='anthropic', model='*', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True, notes='Anthropic wildcard: Sonnet defaults. Per-model variations below.')) +register(VendorCapabilities(vendor='anthropic', model='claude-sonnet-4-5-20250929', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-sonnet-4-20250514', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-sonnet-4-6', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-1-20250805', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-20250514', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-5-20251101', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-6', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-7', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-8', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-haiku-4-5-20251001', context_window=200000, cost_input_per_mtok=1.00, cost_output_per_mtok=5.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='anthropic', model='claude-fable-5', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) +register(VendorCapabilities(vendor='gemini', model='*', context_window=1000000, cost_input_per_mtok=1.25, cost_output_per_mtok=5.00, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True, notes='Gemini wildcard: 1M+ context window. Per-model variations below.')) +register(VendorCapabilities(vendor='gemini', model='gemini-3.1-pro-preview', context_window=1000000, cost_input_per_mtok=3.50, cost_output_per_mtok=10.50, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True)) +register(VendorCapabilities(vendor='gemini', model='gemini-3-flash-preview', context_window=1000000, cost_input_per_mtok=0.15, cost_output_per_mtok=0.60, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True)) +register(VendorCapabilities(vendor='gemini', model='gemini-2.5-flash', context_window=1000000, cost_input_per_mtok=0.15, cost_output_per_mtok=0.60, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True)) +register(VendorCapabilities(vendor='gemini', model='gemini-2.5-flash-lite', context_window=1000000, cost_input_per_mtok=0.075, cost_output_per_mtok=0.30, caching=True, vision=True, grounding=True, structured_output=True)) +register(VendorCapabilities(vendor='deepseek', model='*', context_window=32768, cost_input_per_mtok=0.27, cost_output_per_mtok=1.10, reasoning=True, structured_output=True, notes='DeepSeek wildcard: V3 defaults. R1/reasoner variants below.')) +register(VendorCapabilities(vendor='deepseek', model='deepseek-v3', context_window=32768, cost_input_per_mtok=0.27, cost_output_per_mtok=1.10, structured_output=True)) +register(VendorCapabilities(vendor='deepseek', model='deepseek-reasoner', context_window=32768, cost_input_per_mtok=0.55, cost_output_per_mtok=2.19, reasoning=True, structured_output=True)) +register(VendorCapabilities(vendor='deepseek', model='deepseek-r1', context_window=32768, cost_input_per_mtok=0.55, cost_output_per_mtok=2.19, reasoning=True, structured_output=True)) + +#endregion: Vendor Capabilities + +#region: Vendor State (moved from src/vendor_state.py) + +@dataclass(frozen=True) +class VendorMetric: + key: str + label: str + value: str + state: str + tooltip: str + +#endregion: Vendor State + #region: System Prompt Management def set_custom_system_prompt(prompt: str) -> None: @@ -3447,103 +3551,3 @@ def run_discussion_compression(discussion_text: str) -> str: return f"ERROR: Unsupported provider for discussion compression: '{p}'" #endregion: Subagent Summarization - -#region: Vendor Capabilities (moved from src/vendor_capabilities.py) -@dataclass(frozen=True) -class VendorCapabilities: - vendor: str - model: str - vision: bool = False - tool_calling: bool = True - caching: bool = False - streaming: bool = True - model_discovery: bool = True - context_window: int = 8192 - cost_tracking: bool = True - cost_input_per_mtok: float = 0.0 - cost_output_per_mtok: float = 0.0 - notes: str = '' - local: bool = False - reasoning: bool = False - structured_output: bool = False - code_execution: bool = False - web_search: bool = False - x_search: bool = False - file_search: bool = False - mcp_support: bool = False - audio: bool = False - video: bool = False - grounding: bool = False - computer_use: bool = False - -_VENDOR_REGISTRY: dict[tuple[str, str], "VendorCapabilities"] = {} - -def register(cap: "VendorCapabilities") -> None: - _VENDOR_REGISTRY[(cap.vendor, cap.model)] = cap - -def get_capabilities(vendor: str, model: str) -> "VendorCapabilities": - if (vendor, model) in _VENDOR_REGISTRY: return _VENDOR_REGISTRY[(vendor, model)] - if (vendor, '*') in _VENDOR_REGISTRY: return _VENDOR_REGISTRY[(vendor, '*')] - raise KeyError(f'No capabilities registered for vendor={vendor!r} model={model!r}') - -def list_models_for_vendor(vendor: str) -> list[str]: - return sorted({m for v, m in _VENDOR_REGISTRY if v == vendor and m != '*'}) - -register(VendorCapabilities(vendor='minimax', model='*', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) -register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.7', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20, reasoning=True)) -register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.5', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20, reasoning=True)) -register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.1', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) -register(VendorCapabilities(vendor='minimax', model='MiniMax-M2', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) -register(VendorCapabilities(vendor='grok', model='*', context_window=131072, cost_input_per_mtok=2.00, cost_output_per_mtok=10.00, web_search=True, x_search=True)) -register(VendorCapabilities(vendor='grok', model='grok-2', context_window=131072, web_search=True, x_search=True)) -register(VendorCapabilities(vendor='grok', model='grok-2-vision', vision=True, context_window=32768, web_search=True, x_search=True)) -register(VendorCapabilities(vendor='grok', model='grok-beta', context_window=131072, cost_input_per_mtok=5.00, cost_output_per_mtok=15.00, web_search=True, x_search=True)) -register(VendorCapabilities(vendor='llama', model='*', context_window=131072)) -register(VendorCapabilities(vendor='llama', model='llama-3.1-8b-instant', context_window=131072, cost_input_per_mtok=0.05, cost_output_per_mtok=0.08)) -register(VendorCapabilities(vendor='llama', model='llama-3.1-70b-versatile', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79)) -register(VendorCapabilities(vendor='llama', model='llama-3.1-405b-reasoning', context_window=131072, cost_input_per_mtok=3.00, cost_output_per_mtok=3.00, reasoning=True)) -register(VendorCapabilities(vendor='llama', model='llama-3.2-1b-preview', context_window=131072, cost_input_per_mtok=0.04, cost_output_per_mtok=0.04)) -register(VendorCapabilities(vendor='llama', model='llama-3.2-3b-preview', context_window=131072, cost_input_per_mtok=0.06, cost_output_per_mtok=0.06)) -register(VendorCapabilities(vendor='llama', model='llama-3.2-11b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.18, cost_output_per_mtok=0.18)) -register(VendorCapabilities(vendor='llama', model='llama-3.2-90b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.90, cost_output_per_mtok=0.90)) -register(VendorCapabilities(vendor='llama', model='llama-3.3-70b-specdec', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79)) -register(VendorCapabilities(vendor='qwen', model='*', context_window=32768)) -register(VendorCapabilities(vendor='qwen', model='qwen-turbo', context_window=1000000, cost_input_per_mtok=0.05, cost_output_per_mtok=0.10)) -register(VendorCapabilities(vendor='qwen', model='qwen-plus', context_window=131072, cost_input_per_mtok=0.40, cost_output_per_mtok=1.20)) -register(VendorCapabilities(vendor='qwen', model='qwen-max', context_window=32768, cost_input_per_mtok=2.00, cost_output_per_mtok=6.00)) -register(VendorCapabilities(vendor='qwen', model='qwen-long', context_window=1000000, cost_input_per_mtok=0.07, cost_output_per_mtok=0.28, caching=True, notes='qwen-long supports custom chunked long-context caching')) -register(VendorCapabilities(vendor='qwen', model='qwen-vl-plus', vision=True, context_window=131072, cost_input_per_mtok=0.21, cost_output_per_mtok=0.63)) -register(VendorCapabilities(vendor='qwen', model='qwen-vl-max', vision=True, context_window=32768, cost_input_per_mtok=0.50, cost_output_per_mtok=1.50)) -register(VendorCapabilities(vendor='qwen', model='qwen-audio', context_window=32768, cost_input_per_mtok=0.10, cost_output_per_mtok=0.30, audio=True, notes='Audio input support added 2026-06-11 (v2 matrix)')) -register(VendorCapabilities(vendor='anthropic', model='*', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True, notes='Anthropic wildcard: Sonnet defaults. Per-model variations below.')) -register(VendorCapabilities(vendor='anthropic', model='claude-sonnet-4-5-20250929', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='anthropic', model='claude-sonnet-4-20250514', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='anthropic', model='claude-sonnet-4-6', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-1-20250805', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-20250514', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-5-20251101', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-6', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-7', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='anthropic', model='claude-opus-4-8', context_window=200000, cost_input_per_mtok=15.00, cost_output_per_mtok=75.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='anthropic', model='claude-haiku-4-5-20251001', context_window=200000, cost_input_per_mtok=1.00, cost_output_per_mtok=5.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='anthropic', model='claude-fable-5', context_window=200000, cost_input_per_mtok=3.00, cost_output_per_mtok=15.00, caching=True, structured_output=True, file_search=True, mcp_support=True, computer_use=True)) -register(VendorCapabilities(vendor='gemini', model='*', context_window=1000000, cost_input_per_mtok=1.25, cost_output_per_mtok=5.00, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True, notes='Gemini wildcard: 1M+ context window. Per-model variations below.')) -register(VendorCapabilities(vendor='gemini', model='gemini-3.1-pro-preview', context_window=1000000, cost_input_per_mtok=3.50, cost_output_per_mtok=10.50, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True)) -register(VendorCapabilities(vendor='gemini', model='gemini-3-flash-preview', context_window=1000000, cost_input_per_mtok=0.15, cost_output_per_mtok=0.60, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True)) -register(VendorCapabilities(vendor='gemini', model='gemini-2.5-flash', context_window=1000000, cost_input_per_mtok=0.15, cost_output_per_mtok=0.60, caching=True, vision=True, video=True, audio=True, grounding=True, structured_output=True)) -register(VendorCapabilities(vendor='gemini', model='gemini-2.5-flash-lite', context_window=1000000, cost_input_per_mtok=0.075, cost_output_per_mtok=0.30, caching=True, vision=True, grounding=True, structured_output=True)) -register(VendorCapabilities(vendor='deepseek', model='*', context_window=32768, cost_input_per_mtok=0.27, cost_output_per_mtok=1.10, reasoning=True, structured_output=True, notes='DeepSeek wildcard: V3 defaults. R1/reasoner variants below.')) -register(VendorCapabilities(vendor='deepseek', model='deepseek-v3', context_window=32768, cost_input_per_mtok=0.27, cost_output_per_mtok=1.10, structured_output=True)) -register(VendorCapabilities(vendor='deepseek', model='deepseek-reasoner', context_window=32768, cost_input_per_mtok=0.55, cost_output_per_mtok=2.19, reasoning=True, structured_output=True)) -register(VendorCapabilities(vendor='deepseek', model='deepseek-r1', context_window=32768, cost_input_per_mtok=0.55, cost_output_per_mtok=2.19, reasoning=True, structured_output=True)) -#endregion: Vendor Capabilities - -#region: Vendor State (moved from src/vendor_state.py) -@dataclass(frozen=True) -class VendorMetric: - key: str - label: str - value: str - state: str - tooltip: str -#endregion: Vendor State