From 9e5fed56a5bd776fe195345f5e7314312696cfd6 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Mon, 8 Jun 2026 00:22:22 -0400 Subject: [PATCH] feat(categorizer): implement subsystem/speed/batch_group inference --- tests/categorizer.py | 72 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/tests/categorizer.py b/tests/categorizer.py index 1ab54fa1..d3d7a9a3 100644 --- a/tests/categorizer.py +++ b/tests/categorizer.py @@ -36,6 +36,38 @@ _FIXTURE_ARGS: dict[FixtureClass, frozenset[str]] = { FixtureClass.MOCK_APP: frozenset({"mock_app", "app_instance"}), } +_SUBSYSTEM_PREFIXES: tuple[str, ...] = ( + "ai", "api", "arch", "ast", "async", "auto", "beads", "bias", "cache", + "cli", "cmd", "comms", "conductor", "context", "cost", "dag", "deepseek", + "diff", "discussion", "event", "execution", "external", "ext", "fuzzy", + "gemini", "gui", "headless", "history", "hooks", "hot", "imgui", "layout", + "live", "log", "mcp", "markdown", "minimax", "mma", "model", "orchestrator", + "outline", "parallel", "patch", "perf", "persona", "phase", "pipeline", + "preset", "prior", "process", "project", "provider", "rag", "script", + "session", "shader", "sim", "skeleton", "slice", "spawn", "status", + "subagent", "summary", "symbol", "sync", "synthesis", "system", "takes", + "theme", "thinking", "ticket", "tier4", "tiered", "token", "tool", "track", + "tree", "ts", "undo", "usage", "user", "vendor", "view", "visual", + "vlogger", "websocket", "workflow", "workspace", "z", +) + +_BATCH_GROUP_CLUSTERS: dict[str, tuple[str, ...]] = { + "core": ( + "mcp", "ai", "context", "api", "dag", "path", "presets", "personas", + "history", "workspace", "rag", "beads", "model", "ast", "async", "cache", + "cli", "cmd", "fuzzy", "hooks", "log", "markdown", "orchestrator", + "outline", "pipeline", "project", "provider", "script", "session", + "skeleton", "slice", "spawn", "status", "subagent", "summary", "symbol", + "sync", "synthesis", "system", "takes", "thinking", "tier4", "tiered", + "tool", "track", "tree", "ts", "usage", "vendor", "vlogger", "websocket", + "workflow", + ), + "gui": ("gui", "theme", "imgui", "layout", "live", "prior", "visual", "view", "undo"), + "mma": ("mma", "conductor", "execution", "ext", "external", "auto", "manual", "tier", "arch", "phase", "process", "z"), + "comms": ("comms", "diff", "patch", "event", "hot", "process", "shader"), + "headless": ("headless",), +} + def _fixture_args_used(source: str) -> set[str]: try: tree = ast.parse(source) @@ -64,14 +96,48 @@ def _classify_fixture_class(path: Path, source: str) -> FixtureClass: return FixtureClass.PERFORMANCE return FixtureClass.UNIT +def _infer_subsystems(filename: str) -> list[str]: + stem = filename.removeprefix("test_").removesuffix(".py") + for prefix in sorted(_SUBSYSTEM_PREFIXES, key=len, reverse=True): + if stem.startswith(prefix + "_") or stem == prefix: + return [prefix] + return [] + +def _infer_batch_group(subsystems: list[str]) -> str: + if not subsystems: + return "core" + first = subsystems[0] + for group, members in _BATCH_GROUP_CLUSTERS.items(): + if first in members: + return group + return "core" + +def _infer_speed(filename: str, durations: dict[str, float] | None) -> Speed: + if not durations: + return Speed.MEDIUM + matching = [v for k, v in durations.items() if k.startswith(filename + "::")] + if not matching: + return Speed.MEDIUM + p95 = sorted(matching)[int(len(matching) * 0.95)] + if p95 < 1.0: + return Speed.FAST + if p95 < 5.0: + return Speed.MEDIUM + if p95 < 30.0: + return Speed.SLOW + return Speed.VERY_SLOW + def auto_classify(path: Path, durations: dict[str, float] | None = None) -> CategoryRecord: source = path.read_text(encoding="utf-8", errors="replace") fixture_class = _classify_fixture_class(path, source) + subsystems = _infer_subsystems(path.name) + speed = _infer_speed(path.name, durations) + batch_group = _infer_batch_group(subsystems) return CategoryRecord( filename=path.name, fixture_class=fixture_class, - subsystems=[], - speed=Speed.MEDIUM, - batch_group="", + subsystems=subsystems, + speed=speed, + batch_group=batch_group, source="auto", )