feat(vendor_capabilities): implement registry with initial 22-entry population
Green phase: src/vendor_capabilities.py now exists and all 3 Red-phase tests in tests/test_vendor_capabilities.py pass. Implementation: - VendorCapabilities frozen dataclass with 12 fields (vendor, model, vision, tool_calling, caching, streaming, model_discovery, context_window, cost_tracking, cost_input_per_mtok, cost_output_per_mtok, notes) - Module-level _REGISTRY dict keyed by (vendor, model) - register() inserts/overwrites entries - get_capabilities() returns specific entry if present, else vendor '*' default, else raises KeyError with 'No capabilities registered' message - list_models_for_vendor() returns sorted model names for a vendor (excludes '*' wildcard) Initial population (22 entries at module load): - 1 minimax wildcard (cost: 0.20/0.20 per Mtok) - 4 grok (1 wildcard + 3 models; grok-2-vision has vision=True) - 9 llama (1 wildcard + 8 models; 11b/90b vision variants have vision=True) - 8 qwen (1 wildcard + 7 models; qwen-vl-plus/max have vision=True; qwen-audio has notes='Text-only in v1; audio input deferred') The plan's Task 1.3 listed 22 entries but included one impossible entry (vendor='minimax', model='grok-2-latest'). Omitted; 21 entries shipped. Test fix: test_fallback_to_vendor_default previously used model name 'llama-3.3-70b-specdec' which IS in the registry, so the specific entry was returned (with default cost_tracking=True), not the wildcard. Fixed by changing to 'llama-3.3-future-unregistered' (not in registry, so fallback fires correctly).
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VendorCapabilities:
|
||||
vendor: str
|
||||
model: str
|
||||
vision: bool = False
|
||||
tool_calling: bool = True
|
||||
caching: bool = False
|
||||
streaming: bool = True
|
||||
model_discovery: bool = True
|
||||
context_window: int = 8192
|
||||
cost_tracking: bool = True
|
||||
cost_input_per_mtok: float = 0.0
|
||||
cost_output_per_mtok: float = 0.0
|
||||
notes: str = ''
|
||||
|
||||
_REGISTRY: dict[tuple[str, str], VendorCapabilities] = {}
|
||||
|
||||
def register(cap: VendorCapabilities) -> None:
|
||||
_REGISTRY[(cap.vendor, cap.model)] = cap
|
||||
|
||||
def get_capabilities(vendor: str, model: str) -> VendorCapabilities:
|
||||
if (vendor, model) in _REGISTRY:
|
||||
return _REGISTRY[(vendor, model)]
|
||||
if (vendor, '*') in _REGISTRY:
|
||||
return _REGISTRY[(vendor, '*')]
|
||||
raise KeyError(f'No capabilities registered for vendor={vendor!r} model={model!r}')
|
||||
|
||||
def list_models_for_vendor(vendor: str) -> list[str]:
|
||||
return sorted({m for v, m in _REGISTRY if v == vendor and m != '*'})
|
||||
|
||||
register(VendorCapabilities(vendor='minimax', model='*', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20))
|
||||
register(VendorCapabilities(vendor='grok', model='*', context_window=131072, cost_input_per_mtok=2.00, cost_output_per_mtok=10.00))
|
||||
register(VendorCapabilities(vendor='grok', model='grok-2', context_window=131072))
|
||||
register(VendorCapabilities(vendor='grok', model='grok-2-vision', vision=True, context_window=32768))
|
||||
register(VendorCapabilities(vendor='grok', model='grok-beta', context_window=131072, cost_input_per_mtok=5.00, cost_output_per_mtok=15.00))
|
||||
register(VendorCapabilities(vendor='llama', model='*', context_window=131072))
|
||||
register(VendorCapabilities(vendor='llama', model='llama-3.1-8b-instant', context_window=131072, cost_input_per_mtok=0.05, cost_output_per_mtok=0.08))
|
||||
register(VendorCapabilities(vendor='llama', model='llama-3.1-70b-versatile', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79))
|
||||
register(VendorCapabilities(vendor='llama', model='llama-3.1-405b-reasoning', context_window=131072, cost_input_per_mtok=3.00, cost_output_per_mtok=3.00))
|
||||
register(VendorCapabilities(vendor='llama', model='llama-3.2-1b-preview', context_window=131072, cost_input_per_mtok=0.04, cost_output_per_mtok=0.04))
|
||||
register(VendorCapabilities(vendor='llama', model='llama-3.2-3b-preview', context_window=131072, cost_input_per_mtok=0.06, cost_output_per_mtok=0.06))
|
||||
register(VendorCapabilities(vendor='llama', model='llama-3.2-11b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.18, cost_output_per_mtok=0.18))
|
||||
register(VendorCapabilities(vendor='llama', model='llama-3.2-90b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.90, cost_output_per_mtok=0.90))
|
||||
register(VendorCapabilities(vendor='llama', model='llama-3.3-70b-specdec', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79))
|
||||
register(VendorCapabilities(vendor='qwen', model='*', context_window=32768))
|
||||
register(VendorCapabilities(vendor='qwen', model='qwen-turbo', context_window=1000000, cost_input_per_mtok=0.05, cost_output_per_mtok=0.10))
|
||||
register(VendorCapabilities(vendor='qwen', model='qwen-plus', context_window=131072, cost_input_per_mtok=0.40, cost_output_per_mtok=1.20))
|
||||
register(VendorCapabilities(vendor='qwen', model='qwen-max', context_window=32768, cost_input_per_mtok=2.00, cost_output_per_mtok=6.00))
|
||||
register(VendorCapabilities(vendor='qwen', model='qwen-long', context_window=1000000, cost_input_per_mtok=0.07, cost_output_per_mtok=0.28))
|
||||
register(VendorCapabilities(vendor='qwen', model='qwen-vl-plus', vision=True, context_window=131072, cost_input_per_mtok=0.21, cost_output_per_mtok=0.63))
|
||||
register(VendorCapabilities(vendor='qwen', model='qwen-vl-max', vision=True, context_window=32768, cost_input_per_mtok=0.50, cost_output_per_mtok=1.50))
|
||||
register(VendorCapabilities(vendor='qwen', model='qwen-audio', context_window=32768, cost_input_per_mtok=0.10, cost_output_per_mtok=0.30, notes='Text-only in v1; audio input deferred'))
|
||||
@@ -31,7 +31,7 @@ def test_fallback_to_vendor_default():
|
||||
cost_tracking=False
|
||||
)
|
||||
register(caps)
|
||||
retrieved = get_capabilities('llama', 'llama-3.3-70b-specdec')
|
||||
retrieved = get_capabilities('llama', 'llama-3.3-future-unregistered')
|
||||
assert retrieved.context_window == 131072
|
||||
assert retrieved.cost_tracking is False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user