diff --git a/pyproject.toml b/pyproject.toml
index 19c17156..ec83f7ad 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ dev = [
     "pytest>=9.0.2",
     "pytest-cov>=7.0.0",
     "pytest-asyncio>=0.25.3",
+    "pytest-xdist>=3.6.0",
 ]
 
 [tool.pytest.ini_options]
diff --git a/scripts/run_tests_batched.py b/scripts/run_tests_batched.py
index 6139665d..a77a40e3 100644
--- a/scripts/run_tests_batched.py
+++ b/scripts/run_tests_batched.py
@@ -1,11 +1,49 @@
 import argparse
 import json
+import os
 import subprocess
 import sys
 import time
 from pathlib import Path
 
-sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tests"))
+try:
+ import xdist as _xdist
+ _HAS_XDIST = True
+except ImportError:
+ _HAS_XDIST = False
+
+_SCRIPT_DIR = Path(__file__).resolve().parent
+_PROJECT_ROOT = _SCRIPT_DIR.parent
+sys.path.insert(0, str(_PROJECT_ROOT / "tests"))
+
+_USE_COLOR = sys.stdout.isatty() or os.environ.get("FORCE_COLOR") == "1"
+if _USE_COLOR and os.name == "nt":
+ try:
+  import ctypes
+  kernel32 = ctypes.windll.kernel32
+  kernel32.SetConsoleMode(kernel32.GetStdHandle(-11), 7)
+ except Exception:
+  _USE_COLOR = False
+
+class _C:
+ RESET = "\033[0m"
+ BOLD = "\033[1m"
+ DIM = "\033[2m"
+ RED = "\033[31m"
+ GREEN = "\033[32m"
+ YELLOW = "\033[33m"
+ BLUE = "\033[34m"
+ MAGENTA = "\033[35m"
+ CYAN = "\033[36m"
+ BOLD_GREEN = "\033[1;32m"
+ BOLD_RED = "\033[1;31m"
+ BOLD_YELLOW = "\033[1;33m"
+ BOLD_CYAN = "\033[1;36m"
+
+def _c(text: str, color: str) -> str:
+ if not _USE_COLOR:
+  return text
+ return f"{color}{text}{_C.RESET}"
 
 from categorizer import categorize_all
 from batcher import plan, Batch
@@ -50,17 +88,25 @@ def _parse_durations_from_pytest_output(stdout: str) -> dict[str, float]:
 def _run_batch(b: Batch, durations: dict[str, float]) -> tuple[int, float, dict[str, float]]:
  if b.skip_reason:
   return 0, 0.0, {}
- cmd = ["uv", "run", "pytest", "-v", "--durations=0"] + b.pytest_args + [str(f) for f in b.files]
- print(f"\n>>> Running {b.label} ({len(b.files)} files)")
+ args = list(b.pytest_args)
+ if not _HAS_XDIST:
+  args = [a for a in args if a not in {"-n", "auto"}]
+ cmd = ["uv", "run", "pytest", "-v", "--durations=0"] + args + [str(f) for f in b.files]
+ print(_c(f"\n>>> Running {b.label} ({len(b.files)} files)", _C.BOLD_CYAN))
  t0 = time.monotonic()
- proc = subprocess.run(cmd, capture_output=True, text=True)
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
+ captured: list[str] = []
+ assert proc.stdout is not None
+ for line in proc.stdout:
+  captured.append(line)
+  print(line, end="")
+ proc.wait()
  elapsed = time.monotonic() - t0
- new_durs = _parse_durations_from_pytest_output(proc.stdout)
- tail = proc.stdout[-2000:] if proc.returncode != 0 else f"<<< {b.label} PASS in {elapsed:.1f}s"
- print(tail)
- if proc.returncode != 0:
-  print(f"<<< {b.label} FAIL (exit {proc.returncode}) in {elapsed:.1f}s")
-  print(proc.stderr[-1000:])
+ new_durs = _parse_durations_from_pytest_output("".join(captured))
+ if proc.returncode == 0:
+  print(_c(f"<<< {b.label} PASS in {elapsed:.1f}s", _C.BOLD_GREEN))
+ else:
+  print(_c(f"<<< {b.label} FAIL (exit {proc.returncode}) in {elapsed:.1f}s", _C.BOLD_RED))
  return proc.returncode, elapsed, new_durs
 
 def _print_summary(results: list[tuple[Batch, int, float]]) -> int:
@@ -70,20 +116,20 @@ def _print_summary(results: list[tuple[Batch, int, float]]) -> int:
  worst = 0
  for b, code, elapsed in results:
   if b.skip_reason:
-   status = "SKIPPED"
+   status = _c("SKIPPED", _C.BOLD_YELLOW)
   elif code == 0:
-   status = "PASS"
+   status = _c("PASS", _C.BOLD_GREEN)
   else:
-   status = "FAIL"
+   status = _c("FAIL", _C.BOLD_RED)
   worst = max(worst, code)
   n = len(b.files)
-  print(f"[{b.tier}] {b.label:40s} {status:8s} {n} files {elapsed:6.1f}s")
+  print(f"[{b.tier}] {b.label:40s} {status} {n} files {elapsed:6.1f}s")
  return worst
 
 def main() -> int:
  p = argparse.ArgumentParser()
- p.add_argument("--tests-dir", default="tests")
- p.add_argument("--registry", default="tests/test_categories.toml")
+ p.add_argument("--tests-dir", default=str(_PROJECT_ROOT / "tests"))
+ p.add_argument("--registry", default=str(_PROJECT_ROOT / "tests" / "test_categories.toml"))
  p.add_argument("--tiers", default="1,2,3,H")
  p.add_argument("--include-opt-in", action="store_true")
  p.add_argument("--no-xdist", action="store_true")
@@ -91,9 +137,13 @@ def main() -> int:
  p.add_argument("--audit", action="store_true")
  p.add_argument("--strict", action="store_true")
  p.add_argument("--durations", action="store_true", help="Record per-test durations to .test_durations.json")
+ p.add_argument("--no-color", action="store_true", help="Disable ANSI color output")
  options = p.parse_args()
+ if options.no_color:
+  global _USE_COLOR
+  _USE_COLOR = False
  tiers = _parse_tiers(options.tiers)
- tests_dir = Path(options.tests_dir)
+ tests_dir = Path(options.tests_dir) if Path(options.tests_dir).is_absolute() else (_PROJECT_ROOT / options.tests_dir)
  durations_path = _durations_path(tests_dir)
  durations = _load_durations(durations_path)
  records = categorize_all(tests_dir, Path(options.registry))
@@ -104,17 +154,17 @@ def main() -> int:
    print(f" {r.filename}: fc={r.fixture_class.value}, subs={r.subsystems}, bg={r.batch_group}")
   if options.strict:
    bad = [r for r in auto if len(r.subsystems) > 1]
-  if bad:
-   print(f"STRICT: {len(bad)} auto-inferred files have multiple subsystems:")
-  for r in bad:
-   print(f" {r.filename}: subs={r.subsystems}")
-  return 1
+   if bad:
+    print(f"STRICT: {len(bad)} auto-inferred files have multiple subsystems:")
+    for r in bad:
+     print(f" {r.filename}: subs={r.subsystems}")
+    return 1
   return 0
  batches = plan(records, tiers=tiers, include_opt_in=options.include_opt_in, xdist=not options.no_xdist)
  if options.plan:
   for b in batches:
    status = "SKIP" if b.skip_reason else "RUN"
-  print(f"[{status}] {b.label}: {len(b.files)} files, est {b.estimated_seconds:.1f}s")
+   print(f"[{status}] {b.label}: {len(b.files)} files, est {b.estimated_seconds:.1f}s")
   return 0
  results: list[tuple[Batch, int, float]] = []
  merged_durations = dict(durations)