diff --git a/scripts/code_stats.py b/scripts/code_stats.py new file mode 100644 index 0000000..c3bbfeb --- /dev/null +++ b/scripts/code_stats.py @@ -0,0 +1,227 @@ +""" +Codebase Statistics Gatherer + +Gathers metrics on the Manual Slop Python codebase: +- Line counts (total, code, comment, blank) +- Class counts and definitions +- Function/method counts and definitions +- Top-level declarations +- File sizes + +Usage: + uv run python scripts/code_stats.py +""" +from __future__ import annotations +import ast +import os +import sys +from collections import defaultdict +from dataclasses import dataclass, field +from pathlib import Path +from typing import Iterator + +TARGET_DIRS = ["src", "simulation", "tests", "scripts"] +IGNORED_PATHS = {"__pycache__", ".git", "node_modules", "venv", ".venv", "env", ".env"} + + +@dataclass +class FileStats: + path: str + total_lines: int = 0 + code_lines: int = 0 + comment_lines: int = 0 + blank_lines: int = 0 + classes: list[str] = field(default_factory=list) + functions: list[str] = field(default_factory=list) + methods: list[str] = field(default_factory=list) + top_level_decls: list[str] = field(default_factory=list) + + +@dataclass +class DirStats: + path: str + files: list[FileStats] = field(default_factory=list) + total_lines: int = 0 + code_lines: int = 0 + comment_lines: int = 0 + blank_lines: int = 0 + class_count: int = 0 + function_count: int = 0 + method_count: int = 0 + file_count: int = 0 + + +class CodeAnalyzer(ast.NodeVisitor): + def __init__(self) -> None: + self.classes: list[str] = [] + self.functions: list[str] = [] + self.methods: list[str] = [] + self.top_level_decls: list[str] = [] + self._in_class: bool = False + self._class_name: str = "" + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + self.classes.append(node.name) + self.top_level_decls.append(f"class {node.name}") + was_in_class = self._in_class + was_class_name = self._class_name + self._in_class = True + self._class_name = node.name + self.generic_visit(node) + self._in_class = was_in_class + self._class_name = was_class_name + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + name = node.name + if self._in_class: + self.methods.append(f"{self._class_name}.{name}") + else: + self.functions.append(name) + self.top_level_decls.append(f"def {name}") + self.generic_visit(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + name = node.name + if self._in_class: + self.methods.append(f"{self._class_name}.{name}") + else: + self.functions.append(name) + self.top_level_decls.append(f"async def {name}") + self.generic_visit(node) + + +def analyze_file(path: Path) -> FileStats | None: + try: + content = path.read_text(encoding="utf-8", errors="replace") + except Exception: + return None + + stats = FileStats(path=str(path)) + lines = content.splitlines() + stats.total_lines = len(lines) + + for line in lines: + stripped = line.strip() + if not stripped: + stats.blank_lines += 1 + elif stripped.startswith("#"): + stats.comment_lines += 1 + else: + stats.code_lines += 1 + + try: + tree = ast.parse(content, filename=str(path)) + except Exception: + return stats + + visitor = CodeAnalyzer() + visitor.visit(tree) + stats.classes = visitor.classes + stats.functions = visitor.functions + stats.methods = visitor.methods + stats.top_level_decls = visitor.top_level_decls + + return stats + + +def walk_python_files(root: Path) -> Iterator[Path]: + for path in root.rglob("*.py"): + if any(ignored in path.parts for ignored in IGNORED_PATHS): + continue + yield path + + +def gather_dir_stats(root: Path) -> DirStats: + stats = DirStats(path=str(root)) + for py_file in sorted(walk_python_files(root)): + file_stats = analyze_file(py_file) + if file_stats: + stats.files.append(file_stats) + stats.total_lines += file_stats.total_lines + stats.code_lines += file_stats.code_lines + stats.comment_lines += file_stats.comment_lines + stats.blank_lines += file_stats.blank_lines + stats.class_count += len(file_stats.classes) + stats.function_count += len(file_stats.functions) + stats.method_count += len(file_stats.methods) + stats.file_count += 1 + return stats + + +def format_bytes(num_bytes: int) -> str: + if num_bytes < 1024: + return f"{num_bytes}B" + elif num_bytes < 1024 * 1024: + return f"{num_bytes / 1024:.1f}KB" + else: + return f"{num_bytes / (1024 * 1024):.1f}MB" + + +def print_stats(d: DirStats) -> None: + print(f"\n{'=' * 60}") + print(f" {d.path}/") + print(f"{'=' * 60}") + print(f" Files: {d.file_count:,}") + print(f" Lines: {d.total_lines:,} (code: {d.code_lines:,} | comment: {d.comment_lines:,} | blank: {d.blank_lines:,})") + print(f" Classes: {d.class_count:,}") + print(f" Functions: {d.function_count:,}") + print(f" Methods: {d.method_count:,}") + total_decls = d.class_count + d.function_count + d.method_count + print(f" Total decls: {total_decls:,}") + + code_bytes = sum(f.total_lines * 50 for f in d.files) + print(f" Est. code size: ~{format_bytes(code_bytes)}") + + print() + if d.files: + print(f" {'File':<35} {'Lines':>8} {'Code':>7} {'Cmts':>6} {'Cls':>5} {'Fn':>5} {'Mth':>5}") + print(f" {'-' * 35} {'-' * 8} {'-' * 7} {'-' * 6} {'-' * 5} {'-' * 5} {'-' * 5}") + for f in sorted(d.files, key=lambda x: x.total_lines, reverse=True)[:20]: + print(f" {os.path.basename(f.path):<35} {f.total_lines:>8,} {f.code_lines:>7,} {f.comment_lines:>6,} {len(f.classes):>5} {len(f.functions):>5} {len(f.methods):>5}") + if len(d.files) > 20: + print(f" ... and {len(d.files) - 20} more files") + + +def main() -> None: + project_root = Path(__file__).parent.parent + print(f"Manual Slop — Codebase Statistics") + print(f"Generated: {__import__('datetime').datetime.now().isoformat()}") + print(f"Project root: {project_root}") + + all_stats: list[DirStats] = [] + for dirname in TARGET_DIRS: + dir_path = project_root / dirname + if dir_path.exists(): + stats = gather_dir_stats(dir_path) + all_stats.append(stats) + print_stats(stats) + else: + print(f"\n[SKIPPED] {dirname}/ does not exist") + + if len(all_stats) > 1: + combined = DirStats(path="(all)") + for s in all_stats: + combined.file_count += s.file_count + combined.total_lines += s.total_lines + combined.code_lines += s.code_lines + combined.comment_lines += s.comment_lines + combined.blank_lines += s.blank_lines + combined.class_count += s.class_count + combined.function_count += s.function_count + combined.method_count += s.method_count + print(f"\n{'=' * 60}") + print(f" COMBINED TOTALS") + print(f"{'=' * 60}") + print(f" Files: {combined.file_count:,}") + print(f" Lines: {combined.total_lines:,} (code: {combined.code_lines:,} | comment: {combined.comment_lines:,} | blank: {combined.blank_lines:,})") + print(f" Classes: {combined.class_count:,}") + print(f" Functions: {combined.function_count:,}") + print(f" Methods: {combined.method_count:,}") + total_decls = combined.class_count + combined.function_count + combined.method_count + print(f" Total decls: {total_decls:,}") + + print(f"\nDone.") + + +if __name__ == "__main__": + main() \ No newline at end of file