import ast import tomllib from dataclasses import dataclass, field from enum import Enum from pathlib import Path class FixtureClass(str, Enum): UNIT = "unit" MOCK_APP = "mock_app" LIVE_GUI = "live_gui" HEADLESS = "headless" OPT_IN = "opt_in" PERFORMANCE = "performance" class Speed(str, Enum): FAST = "fast" MEDIUM = "medium" SLOW = "slow" VERY_SLOW = "very_slow" @dataclass(frozen=True) class CategoryRecord: filename: str fixture_class: FixtureClass subsystems: list[str] speed: Speed batch_group: str notes: str = "" test_order: dict[str, int] = field(default_factory=dict) source: str = "auto" warnings: list[str] = field(default_factory=list) _OPT_IN_PREFIXES: tuple[str, ...] = ("test_clean_install", "test_docker_build") _PERF_KEYWORDS: tuple[str, ...] = ("perf", "stress", "phase_3_final", "phase_4_stress") _FIXTURE_ARGS: dict[FixtureClass, frozenset[str]] = { FixtureClass.LIVE_GUI: frozenset({"live_gui"}), FixtureClass.MOCK_APP: frozenset({"mock_app", "app_instance"}), } _SUBSYSTEM_PREFIXES: tuple[str, ...] = ( "ai", "api", "arch", "ast", "async", "auto", "beads", "bias", "cache", "cli", "cmd", "comms", "conductor", "context", "cost", "dag", "deepseek", "diff", "discussion", "event", "execution", "external", "ext", "fuzzy", "gemini", "gui", "headless", "history", "hooks", "hot", "imgui", "layout", "live", "log", "mcp", "markdown", "minimax", "mma", "model", "orchestrator", "outline", "parallel", "patch", "perf", "persona", "phase", "pipeline", "preset", "prior", "process", "project", "provider", "rag", "script", "session", "shader", "sim", "skeleton", "slice", "spawn", "status", "subagent", "summary", "symbol", "sync", "synthesis", "system", "takes", "theme", "thinking", "ticket", "tier4", "tiered", "token", "tool", "track", "tree", "ts", "undo", "usage", "user", "vendor", "view", "visual", "vlogger", "websocket", "workflow", "workspace", "z", ) _BATCH_GROUP_CLUSTERS: dict[str, tuple[str, ...]] = { "core": ( "mcp", "ai", "context", "api", "dag", "path", "presets", "personas", "history", "workspace", "rag", "beads", "model", "ast", "async", "cache", "cli", "cmd", "fuzzy", "hooks", "log", "markdown", "orchestrator", "outline", "pipeline", "project", "provider", "script", "session", "skeleton", "slice", "spawn", "status", "subagent", "summary", "symbol", "sync", "synthesis", "system", "takes", "thinking", "tier4", "tiered", "tool", "track", "tree", "ts", "usage", "vendor", "vlogger", "websocket", "workflow", ), "gui": ("gui", "theme", "imgui", "layout", "live", "prior", "visual", "view", "undo"), "mma": ("mma", "conductor", "execution", "ext", "external", "auto", "manual", "tier", "arch", "phase", "process", "z"), "comms": ("comms", "diff", "patch", "event", "hot", "process", "shader"), "headless": ("headless",), } def _fixture_args_used(source: str) -> set[str]: try: tree = ast.parse(source) except SyntaxError: return set() found: set[str] = set() for node in ast.walk(tree): if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): found.update(a.arg for a in node.args.args) found.update(a.arg for a in node.args.kwonlyargs) found.update(a.arg for a in node.args.posonlyargs) return found def _classify_fixture_class(path: Path, source: str) -> FixtureClass: name = path.name for prefix in _OPT_IN_PREFIXES: if name.startswith(prefix): return FixtureClass.OPT_IN args = _fixture_args_used(source) for fc, fixture_names in _FIXTURE_ARGS.items(): if args & fixture_names: return fc lowered = name.lower() for kw in _PERF_KEYWORDS: if kw in lowered: return FixtureClass.PERFORMANCE return FixtureClass.UNIT def _infer_subsystems(filename: str) -> list[str]: stem = filename.removeprefix("test_").removesuffix(".py") for prefix in sorted(_SUBSYSTEM_PREFIXES, key=len, reverse=True): if stem.startswith(prefix + "_") or stem == prefix: return [prefix] return [] def _infer_batch_group(subsystems: list[str]) -> str: if not subsystems: return "core" first = subsystems[0] for group, members in _BATCH_GROUP_CLUSTERS.items(): if first in members: return group return "core" def _infer_speed(filename: str, durations: dict[str, float] | None) -> Speed: if not durations: return Speed.MEDIUM matching = [v for k, v in durations.items() if k.startswith(filename + "::")] if not matching: return Speed.MEDIUM p95 = sorted(matching)[int(len(matching) * 0.95)] if p95 < 1.0: return Speed.FAST if p95 < 5.0: return Speed.MEDIUM if p95 < 30.0: return Speed.SLOW return Speed.VERY_SLOW def auto_classify(path: Path, durations: dict[str, float] | None = None) -> CategoryRecord: source = path.read_text(encoding="utf-8", errors="replace") fixture_class = _classify_fixture_class(path, source) subsystems = _infer_subsystems(path.name) speed = _infer_speed(path.name, durations) batch_group = _infer_batch_group(subsystems) return CategoryRecord( filename=path.name, fixture_class=fixture_class, subsystems=subsystems, speed=speed, batch_group=batch_group, source="auto", ) def load_registry(toml_path: Path) -> dict[str, dict]: if not toml_path.exists(): return {} with toml_path.open("rb") as f: data = tomllib.load(f) return data.get("files", {}) def merge_registry(auto: CategoryRecord, entry: dict) -> CategoryRecord: warnings = list(auto.warnings) if "fixture_class" in entry and entry["fixture_class"] != auto.fixture_class.value: warnings.append(f"fixture_class-override: {auto.fixture_class.value} -> {entry['fixture_class']}") if "subsystems" in entry and set(entry["subsystems"]) != set(auto.subsystems): warnings.append(f"subsystems-override: {auto.subsystems} -> {entry['subsystems']}") return CategoryRecord( filename=auto.filename, fixture_class=FixtureClass(entry.get("fixture_class", auto.fixture_class.value)), subsystems=list(entry.get("subsystems", auto.subsystems)), speed=Speed(entry.get("speed", auto.speed.value)), batch_group=entry.get("batch_group", auto.batch_group), notes=entry.get("notes", auto.notes), test_order=dict(auto.test_order), source="registry", warnings=warnings, ) def categorize_all(tests_dir: Path, registry_path: Path) -> list[CategoryRecord]: registry = load_registry(registry_path) records: list[CategoryRecord] = [] for path in sorted(tests_dir.glob("test_*.py")): auto = auto_classify(path) entry = registry.get(path.name, {}) if entry: records.append(merge_registry(auto, entry)) else: records.append(auto) return records