""" Codebase Statistics Gatherer Gathers metrics on the Manual Slop Python codebase: - Line counts (total, code, comment, blank) - Class counts and definitions - Function/method counts and definitions - Top-level declarations - File sizes Usage: uv run python scripts/code_stats.py """ from __future__ import annotations import ast import os import sys from collections import defaultdict from dataclasses import dataclass, field from pathlib import Path from typing import Iterator TARGET_DIRS = ["src", "simulation", "tests", "scripts"] IGNORED_PATHS = {"__pycache__", ".git", "node_modules", "venv", ".venv", "env", ".env"} @dataclass class FileStats: path: str total_lines: int = 0 code_lines: int = 0 comment_lines: int = 0 blank_lines: int = 0 classes: list[str] = field(default_factory=list) functions: list[str] = field(default_factory=list) methods: list[str] = field(default_factory=list) top_level_decls: list[str] = field(default_factory=list) @dataclass class DirStats: path: str files: list[FileStats] = field(default_factory=list) total_lines: int = 0 code_lines: int = 0 comment_lines: int = 0 blank_lines: int = 0 class_count: int = 0 function_count: int = 0 method_count: int = 0 file_count: int = 0 class CodeAnalyzer(ast.NodeVisitor): def __init__(self) -> None: self.classes: list[str] = [] self.functions: list[str] = [] self.methods: list[str] = [] self.top_level_decls: list[str] = [] self._in_class: bool = False self._class_name: str = "" def visit_ClassDef(self, node: ast.ClassDef) -> None: self.classes.append(node.name) self.top_level_decls.append(f"class {node.name}") was_in_class = self._in_class was_class_name = self._class_name self._in_class = True self._class_name = node.name self.generic_visit(node) self._in_class = was_in_class self._class_name = was_class_name def visit_FunctionDef(self, node: ast.FunctionDef) -> None: name = node.name if self._in_class: self.methods.append(f"{self._class_name}.{name}") else: self.functions.append(name) self.top_level_decls.append(f"def {name}") self.generic_visit(node) def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: name = node.name if self._in_class: self.methods.append(f"{self._class_name}.{name}") else: self.functions.append(name) self.top_level_decls.append(f"async def {name}") self.generic_visit(node) def analyze_file(path: Path) -> FileStats | None: try: content = path.read_text(encoding="utf-8", errors="replace") except Exception: return None stats = FileStats(path=str(path)) lines = content.splitlines() stats.total_lines = len(lines) for line in lines: stripped = line.strip() if not stripped: stats.blank_lines += 1 elif stripped.startswith("#"): stats.comment_lines += 1 else: stats.code_lines += 1 try: tree = ast.parse(content, filename=str(path)) except Exception: return stats visitor = CodeAnalyzer() visitor.visit(tree) stats.classes = visitor.classes stats.functions = visitor.functions stats.methods = visitor.methods stats.top_level_decls = visitor.top_level_decls return stats def walk_python_files(root: Path) -> Iterator[Path]: for path in root.rglob("*.py"): if any(ignored in path.parts for ignored in IGNORED_PATHS): continue yield path def gather_dir_stats(root: Path) -> DirStats: stats = DirStats(path=str(root)) for py_file in sorted(walk_python_files(root)): file_stats = analyze_file(py_file) if file_stats: stats.files.append(file_stats) stats.total_lines += file_stats.total_lines stats.code_lines += file_stats.code_lines stats.comment_lines += file_stats.comment_lines stats.blank_lines += file_stats.blank_lines stats.class_count += len(file_stats.classes) stats.function_count += len(file_stats.functions) stats.method_count += len(file_stats.methods) stats.file_count += 1 return stats def format_bytes(num_bytes: int) -> str: if num_bytes < 1024: return f"{num_bytes}B" elif num_bytes < 1024 * 1024: return f"{num_bytes / 1024:.1f}KB" else: return f"{num_bytes / (1024 * 1024):.1f}MB" def print_stats(d: DirStats) -> None: print(f"\n{'=' * 60}") print(f" {d.path}/") print(f"{'=' * 60}") print(f" Files: {d.file_count:,}") print(f" Lines: {d.total_lines:,} (code: {d.code_lines:,} | comment: {d.comment_lines:,} | blank: {d.blank_lines:,})") print(f" Classes: {d.class_count:,}") print(f" Functions: {d.function_count:,}") print(f" Methods: {d.method_count:,}") total_decls = d.class_count + d.function_count + d.method_count print(f" Total decls: {total_decls:,}") code_bytes = sum(f.total_lines * 50 for f in d.files) print(f" Est. code size: ~{format_bytes(code_bytes)}") print() if d.files: print(f" {'File':<35} {'Lines':>8} {'Code':>7} {'Cmts':>6} {'Cls':>5} {'Fn':>5} {'Mth':>5}") print(f" {'-' * 35} {'-' * 8} {'-' * 7} {'-' * 6} {'-' * 5} {'-' * 5} {'-' * 5}") for f in sorted(d.files, key=lambda x: x.total_lines, reverse=True)[:20]: print(f" {os.path.basename(f.path):<35} {f.total_lines:>8,} {f.code_lines:>7,} {f.comment_lines:>6,} {len(f.classes):>5} {len(f.functions):>5} {len(f.methods):>5}") if len(d.files) > 20: print(f" ... and {len(d.files) - 20} more files") def main() -> None: project_root = Path(__file__).parent.parent print(f"Manual Slop — Codebase Statistics") print(f"Generated: {__import__('datetime').datetime.now().isoformat()}") print(f"Project root: {project_root}") all_stats: list[DirStats] = [] for dirname in TARGET_DIRS: dir_path = project_root / dirname if dir_path.exists(): stats = gather_dir_stats(dir_path) all_stats.append(stats) print_stats(stats) else: print(f"\n[SKIPPED] {dirname}/ does not exist") if len(all_stats) > 1: combined = DirStats(path="(all)") for s in all_stats: combined.file_count += s.file_count combined.total_lines += s.total_lines combined.code_lines += s.code_lines combined.comment_lines += s.comment_lines combined.blank_lines += s.blank_lines combined.class_count += s.class_count combined.function_count += s.function_count combined.method_count += s.method_count print(f"\n{'=' * 60}") print(f" COMBINED TOTALS") print(f"{'=' * 60}") print(f" Files: {combined.file_count:,}") print(f" Lines: {combined.total_lines:,} (code: {combined.code_lines:,} | comment: {combined.comment_lines:,} | blank: {combined.blank_lines:,})") print(f" Classes: {combined.class_count:,}") print(f" Functions: {combined.function_count:,}") print(f" Methods: {combined.method_count:,}") total_decls = combined.class_count + combined.function_count + combined.method_count print(f" Total decls: {total_decls:,}") print(f"\nDone.") if __name__ == "__main__": main()