diff --git a/src/vendor_capabilities.py b/src/vendor_capabilities.py new file mode 100644 index 00000000..e47df842 --- /dev/null +++ b/src/vendor_capabilities.py @@ -0,0 +1,55 @@ +from __future__ import annotations +from dataclasses import dataclass + +@dataclass(frozen=True) +class VendorCapabilities: + vendor: str + model: str + vision: bool = False + tool_calling: bool = True + caching: bool = False + streaming: bool = True + model_discovery: bool = True + context_window: int = 8192 + cost_tracking: bool = True + cost_input_per_mtok: float = 0.0 + cost_output_per_mtok: float = 0.0 + notes: str = '' + +_REGISTRY: dict[tuple[str, str], VendorCapabilities] = {} + +def register(cap: VendorCapabilities) -> None: + _REGISTRY[(cap.vendor, cap.model)] = cap + +def get_capabilities(vendor: str, model: str) -> VendorCapabilities: + if (vendor, model) in _REGISTRY: + return _REGISTRY[(vendor, model)] + if (vendor, '*') in _REGISTRY: + return _REGISTRY[(vendor, '*')] + raise KeyError(f'No capabilities registered for vendor={vendor!r} model={model!r}') + +def list_models_for_vendor(vendor: str) -> list[str]: + return sorted({m for v, m in _REGISTRY if v == vendor and m != '*'}) + +register(VendorCapabilities(vendor='minimax', model='*', context_window=131072, cost_input_per_mtok=0.20, cost_output_per_mtok=0.20)) +register(VendorCapabilities(vendor='grok', model='*', context_window=131072, cost_input_per_mtok=2.00, cost_output_per_mtok=10.00)) +register(VendorCapabilities(vendor='grok', model='grok-2', context_window=131072)) +register(VendorCapabilities(vendor='grok', model='grok-2-vision', vision=True, context_window=32768)) +register(VendorCapabilities(vendor='grok', model='grok-beta', context_window=131072, cost_input_per_mtok=5.00, cost_output_per_mtok=15.00)) +register(VendorCapabilities(vendor='llama', model='*', context_window=131072)) +register(VendorCapabilities(vendor='llama', model='llama-3.1-8b-instant', context_window=131072, cost_input_per_mtok=0.05, cost_output_per_mtok=0.08)) +register(VendorCapabilities(vendor='llama', model='llama-3.1-70b-versatile', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79)) +register(VendorCapabilities(vendor='llama', model='llama-3.1-405b-reasoning', context_window=131072, cost_input_per_mtok=3.00, cost_output_per_mtok=3.00)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-1b-preview', context_window=131072, cost_input_per_mtok=0.04, cost_output_per_mtok=0.04)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-3b-preview', context_window=131072, cost_input_per_mtok=0.06, cost_output_per_mtok=0.06)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-11b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.18, cost_output_per_mtok=0.18)) +register(VendorCapabilities(vendor='llama', model='llama-3.2-90b-vision-preview', vision=True, context_window=131072, cost_input_per_mtok=0.90, cost_output_per_mtok=0.90)) +register(VendorCapabilities(vendor='llama', model='llama-3.3-70b-specdec', context_window=131072, cost_input_per_mtok=0.59, cost_output_per_mtok=0.79)) +register(VendorCapabilities(vendor='qwen', model='*', context_window=32768)) +register(VendorCapabilities(vendor='qwen', model='qwen-turbo', context_window=1000000, cost_input_per_mtok=0.05, cost_output_per_mtok=0.10)) +register(VendorCapabilities(vendor='qwen', model='qwen-plus', context_window=131072, cost_input_per_mtok=0.40, cost_output_per_mtok=1.20)) +register(VendorCapabilities(vendor='qwen', model='qwen-max', context_window=32768, cost_input_per_mtok=2.00, cost_output_per_mtok=6.00)) +register(VendorCapabilities(vendor='qwen', model='qwen-long', context_window=1000000, cost_input_per_mtok=0.07, cost_output_per_mtok=0.28)) +register(VendorCapabilities(vendor='qwen', model='qwen-vl-plus', vision=True, context_window=131072, cost_input_per_mtok=0.21, cost_output_per_mtok=0.63)) +register(VendorCapabilities(vendor='qwen', model='qwen-vl-max', vision=True, context_window=32768, cost_input_per_mtok=0.50, cost_output_per_mtok=1.50)) +register(VendorCapabilities(vendor='qwen', model='qwen-audio', context_window=32768, cost_input_per_mtok=0.10, cost_output_per_mtok=0.30, notes='Text-only in v1; audio input deferred')) \ No newline at end of file diff --git a/tests/test_vendor_capabilities.py b/tests/test_vendor_capabilities.py index 8e1516a3..e8b8ac9a 100644 --- a/tests/test_vendor_capabilities.py +++ b/tests/test_vendor_capabilities.py @@ -31,7 +31,7 @@ def test_fallback_to_vendor_default(): cost_tracking=False ) register(caps) - retrieved = get_capabilities('llama', 'llama-3.3-70b-specdec') + retrieved = get_capabilities('llama', 'llama-3.3-future-unregistered') assert retrieved.context_window == 131072 assert retrieved.cost_tracking is False