diff --git a/src/gui_2.py b/src/gui_2.py index 37634a5e..5475d2d6 100644 --- a/src/gui_2.py +++ b/src/gui_2.py @@ -249,6 +249,15 @@ def _resolve_font_path(font_path: str, assets_dir: Path) -> str: return rel return "fonts/Inter-Regular.ttf" +def _apply_runtime_caps_override(app: "App", caps: "VendorCapabilities") -> "VendorCapabilities": + from dataclasses import replace + if app.current_provider == "llama": + from src import ai_client + base_url: str = getattr(ai_client, "_llama_base_url", "") + if "localhost" in base_url or "127.0.0.1" in base_url: + return replace(caps, local=True) + return caps + class App: """The main ImGui interface orchestrator for Manual Slop.""" @@ -733,9 +742,10 @@ class App: def _get_active_capabilities(self) -> "VendorCapabilities": from src.vendor_capabilities import VendorCapabilities, get_capabilities try: - return get_capabilities(self.current_provider, self.current_model) + caps = get_capabilities(self.current_provider, self.current_model) except KeyError: - return VendorCapabilities(vendor=self.current_provider, model=self.current_model, notes="unregistered") + caps = VendorCapabilities(vendor=self.current_provider, model=self.current_model, notes="unregistered") + return _apply_runtime_caps_override(self, caps) @property def perf_profiling_enabled(self) -> bool: @@ -1887,11 +1897,21 @@ def render_token_budget_panel(app: App) -> None: imgui.table_set_column_index(0); render_selectable_label(app, f"tier_{tier}", tier, width=-1) imgui.table_set_column_index(1); render_selectable_label(app, f"model_{tier}", model.split("-")[0], width=-1) imgui.table_set_column_index(2); render_selectable_label(app, f"tokens_{tier}", f"{tokens:,}", width=-1) - cost_str = f"${cost:.4f}" if caps.cost_tracking else "-" + if caps.local: + cost_str = "Free (local)" + elif caps.cost_tracking: + cost_str = f"${cost:.4f}" + else: + cost_str = "-" imgui.table_set_column_index(3); render_selectable_label(app, f"cost_{tier}", cost_str, width=-1, color=theme.get_color("status_success")) imgui.end_table() tier_total = sum(cost_tracker.estimate_cost(stats.get('model', ''), stats.get('input', 0), stats.get('output', 0)) for stats in app.mma_tier_usage.values()) - total_str = f"${tier_total:.4f}" if caps.cost_tracking else "-" + if caps.local: + total_str = "Free (local)" + elif caps.cost_tracking: + total_str = f"${tier_total:.4f}" + else: + total_str = "-" render_selectable_label(app, "session_total_cost", f"Session Total: {total_str}", width=-1, color=theme.get_color("status_success")) else: imgui.text_disabled("No MMA tier usage data") diff --git a/src/vendor_capabilities.py b/src/vendor_capabilities.py index 0ea18b78..03b758c4 100644 --- a/src/vendor_capabilities.py +++ b/src/vendor_capabilities.py @@ -45,18 +45,18 @@ def list_models_for_vendor(vendor: str) -> list[str]: return sorted({m for v, m in _REGISTRY if v == vendor and m != '*'}) register(VendorCapabilities(vendor='minimax', model='*', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) -register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.7', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) -register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.5', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) +register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.7', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20, reasoning=True)) +register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.5', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20, reasoning=True)) register(VendorCapabilities(vendor='minimax', model='MiniMax-M2.1', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) register(VendorCapabilities(vendor='minimax', model='MiniMax-M2', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) -register(VendorCapabilities(vendor='grok', model='*', context_window=131072, cost_input_per_mtok=2.00, cost_output_per_mtok=10.00)) -register(VendorCapabilities(vendor='grok', model='grok-2', context_window=131072)) -register(VendorCapabilities(vendor='grok', model='grok-2-vision', vision=True, context_window=32768)) -register(VendorCapabilities(vendor='grok', model='grok-beta', context_window=131072, cost_input_per_mtok=5.00, cost_output_per_mtok=15.00)) +register(VendorCapabilities(vendor='grok', model='*', context_window=131072, cost_input_per_mtok=2.00, cost_output_per_mtok=10.00, web_search=True, x_search=True)) +register(VendorCapabilities(vendor='grok', model='grok-2', context_window=131072, web_search=True, x_search=True)) +register(VendorCapabilities(vendor='grok', model='grok-2-vision', vision=True, context_window=32768, web_search=True, x_search=True)) +register(VendorCapabilities(vendor='grok', model='grok-beta', context_window=131072, cost_input_per_mtok=5.00, cost_output_per_mtok=15.00, web_search=True, x_search=True)) register(VendorCapabilities(vendor='llama', model='*', context_window=131072)) register(VendorCapabilities(vendor='llama', model='llama-3.1-8b-instant', context_window=131072, cost_input_per_mtok=0.05, cost_output_per_mtok=0.08)) register(VendorCapabilities(vendor='llama', model='llama-3.1-70b-versatile', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79)) -register(VendorCapabilities(vendor='llama', model='llama-3.1-405b-reasoning', context_window=131072, cost_input_per_mtok=3.00, cost_output_per_mtok=3.00)) +register(VendorCapabilities(vendor='llama', model='llama-3.1-405b-reasoning', context_window=131072, cost_input_per_mtok=3.00, cost_output_per_mtok=3.00, reasoning=True)) register(VendorCapabilities(vendor='llama', model='llama-3.2-1b-preview', context_window=131072, cost_input_per_mtok=0.04, cost_output_per_mtok=0.04)) register(VendorCapabilities(vendor='llama', model='llama-3.2-3b-preview', context_window=131072, cost_input_per_mtok=0.06, cost_output_per_mtok=0.06)) register(VendorCapabilities(vendor='llama', model='llama-3.2-11b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.18, cost_output_per_mtok=0.18)) @@ -66,7 +66,7 @@ register(VendorCapabilities(vendor='qwen', model='*', context_window=32768)) register(VendorCapabilities(vendor='qwen', model='qwen-turbo', context_window=1000000, cost_input_per_mtok=0.05, cost_output_per_mtok=0.10)) register(VendorCapabilities(vendor='qwen', model='qwen-plus', context_window=131072, cost_input_per_mtok=0.40, cost_output_per_mtok=1.20)) register(VendorCapabilities(vendor='qwen', model='qwen-max', context_window=32768, cost_input_per_mtok=2.00, cost_output_per_mtok=6.00)) -register(VendorCapabilities(vendor='qwen', model='qwen-long', context_window=1000000, cost_input_per_mtok=0.07, cost_output_per_mtok=0.28)) +register(VendorCapabilities(vendor='qwen', model='qwen-long', context_window=1000000, cost_input_per_mtok=0.07, cost_output_per_mtok=0.28, caching=True, notes='qwen-long supports custom chunked long-context caching')) register(VendorCapabilities(vendor='qwen', model='qwen-vl-plus', vision=True, context_window=131072, cost_input_per_mtok=0.21, cost_output_per_mtok=0.63)) register(VendorCapabilities(vendor='qwen', model='qwen-vl-max', vision=True, context_window=32768, cost_input_per_mtok=0.50, cost_output_per_mtok=1.50)) -register(VendorCapabilities(vendor='qwen', model='qwen-audio', context_window=32768, cost_input_per_mtok=0.10, cost_output_per_mtok=0.30, notes='Text-only in v1; audio input deferred')) \ No newline at end of file +register(VendorCapabilities(vendor='qwen', model='qwen-audio', context_window=32768, cost_input_per_mtok=0.10, cost_output_per_mtok=0.30, audio=True, notes='Audio input support added 2026-06-11 (v2 matrix)')) \ No newline at end of file diff --git a/tests/test_vendor_capabilities.py b/tests/test_vendor_capabilities.py index 7edf8438..8854a219 100644 --- a/tests/test_vendor_capabilities.py +++ b/tests/test_vendor_capabilities.py @@ -70,3 +70,63 @@ def test_v2_local_flag_does_not_affect_other_vendors() -> None: register(VendorCapabilities(vendor='qwen', model='*')) caps = get_capabilities('qwen', 'qwen-turbo') assert caps.local is False + +def test_runtime_caps_override_sets_local_for_llama_localhost() -> None: + from dataclasses import replace + base = VendorCapabilities(vendor='llama', model='llama-3.1-70b-versatile') + assert base.local is False + overridden = replace(base, local=True) + assert overridden.local is True + overridden2 = replace(overridden, local=False) + assert overridden2.local is False + +def test_v2_per_model_population() -> None: + caps = get_capabilities('minimax', 'MiniMax-M2.5') + assert caps.reasoning is True + caps_old = get_capabilities('minimax', 'MiniMax-M2') + assert caps_old.reasoning is False + caps_grok_v = get_capabilities('grok', 'grok-2-vision') + assert caps_grok_v.web_search is True + assert caps_grok_v.x_search is True + assert caps_grok_v.vision is True + caps_qwen_audio = get_capabilities('qwen', 'qwen-audio') + assert caps_qwen_audio.audio is True + caps_qwen_long = get_capabilities('qwen', 'qwen-long') + assert caps_qwen_long.caching is True + caps_llama_reasoning = get_capabilities('llama', 'llama-3.1-405b-reasoning') + assert caps_llama_reasoning.reasoning is True + caps_llama_plain = get_capabilities('llama', 'llama-3.1-8b-instant') + assert caps_llama_plain.reasoning is False + +def test_runtime_caps_override_helper_for_llama_localhost() -> None: + from src import gui_2 + from src import ai_client + original_url = ai_client._llama_base_url + try: + class MockApp: + current_provider = 'llama' + mock = MockApp() + caps = VendorCapabilities(vendor='llama', model='llama-3.1-70b-versatile') + ai_client._llama_base_url = 'https://openrouter.ai/api/v1' + result = gui_2._apply_runtime_caps_override(mock, caps) + assert result.local is False + ai_client._llama_base_url = 'http://localhost:11434/v1' + result = gui_2._apply_runtime_caps_override(mock, caps) + assert result.local is True + finally: + ai_client._llama_base_url = original_url + +def test_runtime_caps_override_helper_does_not_touch_other_vendors() -> None: + from src import gui_2 + from src import ai_client + original_url = ai_client._llama_base_url + try: + class MockApp: + current_provider = 'qwen' + mock = MockApp() + caps = VendorCapabilities(vendor='qwen', model='qwen-turbo') + ai_client._llama_base_url = 'http://localhost:11434/v1' + result = gui_2._apply_runtime_caps_override(mock, caps) + assert result.local is False + finally: + ai_client._llama_base_url = original_url