Private
Public Access
0
0
Files
manual_slop/tests/categorizer.py
T

144 lines
4.9 KiB
Python

import ast
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
class FixtureClass(str, Enum):
UNIT = "unit"
MOCK_APP = "mock_app"
LIVE_GUI = "live_gui"
HEADLESS = "headless"
OPT_IN = "opt_in"
PERFORMANCE = "performance"
class Speed(str, Enum):
FAST = "fast"
MEDIUM = "medium"
SLOW = "slow"
VERY_SLOW = "very_slow"
@dataclass(frozen=True)
class CategoryRecord:
filename: str
fixture_class: FixtureClass
subsystems: list[str]
speed: Speed
batch_group: str
notes: str = ""
test_order: dict[str, int] = field(default_factory=dict)
source: str = "auto"
warnings: list[str] = field(default_factory=list)
_OPT_IN_PREFIXES: tuple[str, ...] = ("test_clean_install", "test_docker_build")
_PERF_KEYWORDS: tuple[str, ...] = ("perf", "stress", "phase_3_final", "phase_4_stress")
_FIXTURE_ARGS: dict[FixtureClass, frozenset[str]] = {
FixtureClass.LIVE_GUI: frozenset({"live_gui"}),
FixtureClass.MOCK_APP: frozenset({"mock_app", "app_instance"}),
}
_SUBSYSTEM_PREFIXES: tuple[str, ...] = (
"ai", "api", "arch", "ast", "async", "auto", "beads", "bias", "cache",
"cli", "cmd", "comms", "conductor", "context", "cost", "dag", "deepseek",
"diff", "discussion", "event", "execution", "external", "ext", "fuzzy",
"gemini", "gui", "headless", "history", "hooks", "hot", "imgui", "layout",
"live", "log", "mcp", "markdown", "minimax", "mma", "model", "orchestrator",
"outline", "parallel", "patch", "perf", "persona", "phase", "pipeline",
"preset", "prior", "process", "project", "provider", "rag", "script",
"session", "shader", "sim", "skeleton", "slice", "spawn", "status",
"subagent", "summary", "symbol", "sync", "synthesis", "system", "takes",
"theme", "thinking", "ticket", "tier4", "tiered", "token", "tool", "track",
"tree", "ts", "undo", "usage", "user", "vendor", "view", "visual",
"vlogger", "websocket", "workflow", "workspace", "z",
)
_BATCH_GROUP_CLUSTERS: dict[str, tuple[str, ...]] = {
"core": (
"mcp", "ai", "context", "api", "dag", "path", "presets", "personas",
"history", "workspace", "rag", "beads", "model", "ast", "async", "cache",
"cli", "cmd", "fuzzy", "hooks", "log", "markdown", "orchestrator",
"outline", "pipeline", "project", "provider", "script", "session",
"skeleton", "slice", "spawn", "status", "subagent", "summary", "symbol",
"sync", "synthesis", "system", "takes", "thinking", "tier4", "tiered",
"tool", "track", "tree", "ts", "usage", "vendor", "vlogger", "websocket",
"workflow",
),
"gui": ("gui", "theme", "imgui", "layout", "live", "prior", "visual", "view", "undo"),
"mma": ("mma", "conductor", "execution", "ext", "external", "auto", "manual", "tier", "arch", "phase", "process", "z"),
"comms": ("comms", "diff", "patch", "event", "hot", "process", "shader"),
"headless": ("headless",),
}
def _fixture_args_used(source: str) -> set[str]:
try:
tree = ast.parse(source)
except SyntaxError:
return set()
found: set[str] = set()
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
found.update(a.arg for a in node.args.args)
found.update(a.arg for a in node.args.kwonlyargs)
found.update(a.arg for a in node.args.posonlyargs)
return found
def _classify_fixture_class(path: Path, source: str) -> FixtureClass:
name = path.name
for prefix in _OPT_IN_PREFIXES:
if name.startswith(prefix):
return FixtureClass.OPT_IN
args = _fixture_args_used(source)
for fc, fixture_names in _FIXTURE_ARGS.items():
if args & fixture_names:
return fc
lowered = name.lower()
for kw in _PERF_KEYWORDS:
if kw in lowered:
return FixtureClass.PERFORMANCE
return FixtureClass.UNIT
def _infer_subsystems(filename: str) -> list[str]:
stem = filename.removeprefix("test_").removesuffix(".py")
for prefix in sorted(_SUBSYSTEM_PREFIXES, key=len, reverse=True):
if stem.startswith(prefix + "_") or stem == prefix:
return [prefix]
return []
def _infer_batch_group(subsystems: list[str]) -> str:
if not subsystems:
return "core"
first = subsystems[0]
for group, members in _BATCH_GROUP_CLUSTERS.items():
if first in members:
return group
return "core"
def _infer_speed(filename: str, durations: dict[str, float] | None) -> Speed:
if not durations:
return Speed.MEDIUM
matching = [v for k, v in durations.items() if k.startswith(filename + "::")]
if not matching:
return Speed.MEDIUM
p95 = sorted(matching)[int(len(matching) * 0.95)]
if p95 < 1.0:
return Speed.FAST
if p95 < 5.0:
return Speed.MEDIUM
if p95 < 30.0:
return Speed.SLOW
return Speed.VERY_SLOW
def auto_classify(path: Path, durations: dict[str, float] | None = None) -> CategoryRecord:
source = path.read_text(encoding="utf-8", errors="replace")
fixture_class = _classify_fixture_class(path, source)
subsystems = _infer_subsystems(path.name)
speed = _infer_speed(path.name, durations)
batch_group = _infer_batch_group(subsystems)
return CategoryRecord(
filename=path.name,
fixture_class=fixture_class,
subsystems=subsystems,
speed=speed,
batch_group=batch_group,
source="auto",
)