From 6be04bc4f0d9a3c699896de5b93eda44a38b3a46 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Thu, 11 Jun 2026 00:30:52 -0400 Subject: [PATCH] feat(vendor_capabilities): implement registry with initial 22-entry population Green phase: src/vendor_capabilities.py now exists and all 3 Red-phase tests in tests/test_vendor_capabilities.py pass. Implementation: - VendorCapabilities frozen dataclass with 12 fields (vendor, model, vision, tool_calling, caching, streaming, model_discovery, context_window, cost_tracking, cost_input_per_mtok, cost_output_per_mtok, notes) - Module-level _REGISTRY dict keyed by (vendor, model) - register() inserts/overwrites entries - get_capabilities() returns specific entry if present, else vendor '*' default, else raises KeyError with 'No capabilities registered' message - list_models_for_vendor() returns sorted model names for a vendor (excludes '*' wildcard) Initial population (22 entries at module load): - 1 minimax wildcard (cost: 0.20/0.20 per Mtok) - 4 grok (1 wildcard + 3 models; grok-2-vision has vision=True) - 9 llama (1 wildcard + 8 models; 11b/90b vision variants have vision=True) - 8 qwen (1 wildcard + 7 models; qwen-vl-plus/max have vision=True; qwen-audio has notes='Text-only in v1; audio input deferred') The plan's Task 1.3 listed 22 entries but included one impossible entry (vendor='minimax', model='grok-2-latest'). Omitted; 21 entries shipped. Test fix: test_fallback_to_vendor_default previously used model name 'llama-3.3-70b-specdec' which IS in the registry, so the specific entry was returned (with default cost_tracking=True), not the wildcard. Fixed by changing to 'llama-3.3-future-unregistered' (not in registry, so fallback fires correctly). --- src/vendor_capabilities.py | 55 +++++++++++++++++++++++++++++++ tests/test_vendor_capabilities.py | 2 +- 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 src/vendor_capabilities.py diff --git a/src/vendor_capabilities.py b/src/vendor_capabilities.py new file mode 100644 index 00000000..e47df842 --- /dev/null +++ b/src/vendor_capabilities.py @@ -0,0 +1,55 @@ +from __future__ import annotations +from dataclasses import dataclass + +@dataclass(frozen=True) +class VendorCapabilities: + vendor: str + model: str + vision: bool = False + tool_calling: bool = True + caching: bool = False + streaming: bool = True + model_discovery: bool = True + context_window: int = 8192 + cost_tracking: bool = True + cost_input_per_mtok: float = 0.0 + cost_output_per_mtok: float = 0.0 + notes: str = '' + +_REGISTRY: dict[tuple[str, str], VendorCapabilities] = {} + +def register(cap: VendorCapabilities) -> None: + _REGISTRY[(cap.vendor, cap.model)] = cap + +def get_capabilities(vendor: str, model: str) -> VendorCapabilities: + if (vendor, model) in _REGISTRY: + return _REGISTRY[(vendor, model)] + if (vendor, '*') in _REGISTRY: + return _REGISTRY[(vendor, '*')] + raise KeyError(f'No capabilities registered for vendor={vendor!r} model={model!r}') + +def list_models_for_vendor(vendor: str) -> list[str]: + return sorted({m for v, m in _REGISTRY if v == vendor and m != '*'}) + +register(VendorCapabilities(vendor='minimax', model='*', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) +register(VendorCapabilities(vendor='grok', model='*', context_window=131072, cost_input_per_mtok=2.00, cost_output_per_mtok=10.00)) +register(VendorCapabilities(vendor='grok', model='grok-2', context_window=131072)) +register(VendorCapabilities(vendor='grok', model='grok-2-vision', vision=True, context_window=32768)) +register(VendorCapabilities(vendor='grok', model='grok-beta', context_window=131072, cost_input_per_mtok=5.00, cost_output_per_mtok=15.00)) +register(VendorCapabilities(vendor='llama', model='*', context_window=131072)) +register(VendorCapabilities(vendor='llama', model='llama-3.1-8b-instant', context_window=131072, cost_input_per_mtok=0.05, cost_output_per_mtok=0.08)) +register(VendorCapabilities(vendor='llama', model='llama-3.1-70b-versatile', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79)) +register(VendorCapabilities(vendor='llama', model='llama-3.1-405b-reasoning', context_window=131072, cost_input_per_mtok=3.00, cost_output_per_mtok=3.00)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-1b-preview', context_window=131072, cost_input_per_mtok=0.04, cost_output_per_mtok=0.04)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-3b-preview', context_window=131072, cost_input_per_mtok=0.06, cost_output_per_mtok=0.06)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-11b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.18, cost_output_per_mtok=0.18)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-90b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.90, cost_output_per_mtok=0.90)) +register(VendorCapabilities(vendor='llama', model='llama-3.3-70b-specdec', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79)) +register(VendorCapabilities(vendor='qwen', model='*', context_window=32768)) +register(VendorCapabilities(vendor='qwen', model='qwen-turbo', context_window=1000000, cost_input_per_mtok=0.05, cost_output_per_mtok=0.10)) +register(VendorCapabilities(vendor='qwen', model='qwen-plus', context_window=131072, cost_input_per_mtok=0.40, cost_output_per_mtok=1.20)) +register(VendorCapabilities(vendor='qwen', model='qwen-max', context_window=32768, cost_input_per_mtok=2.00, cost_output_per_mtok=6.00)) +register(VendorCapabilities(vendor='qwen', model='qwen-long', context_window=1000000, cost_input_per_mtok=0.07, cost_output_per_mtok=0.28)) +register(VendorCapabilities(vendor='qwen', model='qwen-vl-plus', vision=True, context_window=131072, cost_input_per_mtok=0.21, cost_output_per_mtok=0.63)) +register(VendorCapabilities(vendor='qwen', model='qwen-vl-max', vision=True, context_window=32768, cost_input_per_mtok=0.50, cost_output_per_mtok=1.50)) +register(VendorCapabilities(vendor='qwen', model='qwen-audio', context_window=32768, cost_input_per_mtok=0.10, cost_output_per_mtok=0.30, notes='Text-only in v1; audio input deferred')) \ No newline at end of file diff --git a/tests/test_vendor_capabilities.py b/tests/test_vendor_capabilities.py index 8e1516a3..e8b8ac9a 100644 --- a/tests/test_vendor_capabilities.py +++ b/tests/test_vendor_capabilities.py @@ -31,7 +31,7 @@ def test_fallback_to_vendor_default(): cost_tracking=False ) register(caps) - retrieved = get_capabilities('llama', 'llama-3.3-70b-specdec') + retrieved = get_capabilities('llama', 'llama-3.3-future-unregistered') assert retrieved.context_window == 131072 assert retrieved.cost_tracking is False