227 lines
7.6 KiB
Python
227 lines
7.6 KiB
Python
"""
|
|
Codebase Statistics Gatherer
|
|
|
|
Gathers metrics on the Manual Slop Python codebase:
|
|
- Line counts (total, code, comment, blank)
|
|
- Class counts and definitions
|
|
- Function/method counts and definitions
|
|
- Top-level declarations
|
|
- File sizes
|
|
|
|
Usage:
|
|
uv run python scripts/code_stats.py
|
|
"""
|
|
from __future__ import annotations
|
|
import ast
|
|
import os
|
|
import sys
|
|
from collections import defaultdict
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Iterator
|
|
|
|
TARGET_DIRS = ["src", "simulation", "tests", "scripts"]
|
|
IGNORED_PATHS = {"__pycache__", ".git", "node_modules", "venv", ".venv", "env", ".env"}
|
|
|
|
|
|
@dataclass
|
|
class FileStats:
|
|
path: str
|
|
total_lines: int = 0
|
|
code_lines: int = 0
|
|
comment_lines: int = 0
|
|
blank_lines: int = 0
|
|
classes: list[str] = field(default_factory=list)
|
|
functions: list[str] = field(default_factory=list)
|
|
methods: list[str] = field(default_factory=list)
|
|
top_level_decls: list[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class DirStats:
|
|
path: str
|
|
files: list[FileStats] = field(default_factory=list)
|
|
total_lines: int = 0
|
|
code_lines: int = 0
|
|
comment_lines: int = 0
|
|
blank_lines: int = 0
|
|
class_count: int = 0
|
|
function_count: int = 0
|
|
method_count: int = 0
|
|
file_count: int = 0
|
|
|
|
|
|
class CodeAnalyzer(ast.NodeVisitor):
|
|
def __init__(self) -> None:
|
|
self.classes: list[str] = []
|
|
self.functions: list[str] = []
|
|
self.methods: list[str] = []
|
|
self.top_level_decls: list[str] = []
|
|
self._in_class: bool = False
|
|
self._class_name: str = ""
|
|
|
|
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
self.classes.append(node.name)
|
|
self.top_level_decls.append(f"class {node.name}")
|
|
was_in_class = self._in_class
|
|
was_class_name = self._class_name
|
|
self._in_class = True
|
|
self._class_name = node.name
|
|
self.generic_visit(node)
|
|
self._in_class = was_in_class
|
|
self._class_name = was_class_name
|
|
|
|
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
name = node.name
|
|
if self._in_class:
|
|
self.methods.append(f"{self._class_name}.{name}")
|
|
else:
|
|
self.functions.append(name)
|
|
self.top_level_decls.append(f"def {name}")
|
|
self.generic_visit(node)
|
|
|
|
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
name = node.name
|
|
if self._in_class:
|
|
self.methods.append(f"{self._class_name}.{name}")
|
|
else:
|
|
self.functions.append(name)
|
|
self.top_level_decls.append(f"async def {name}")
|
|
self.generic_visit(node)
|
|
|
|
|
|
def analyze_file(path: Path) -> FileStats | None:
|
|
try:
|
|
content = path.read_text(encoding="utf-8", errors="replace")
|
|
except Exception:
|
|
return None
|
|
|
|
stats = FileStats(path=str(path))
|
|
lines = content.splitlines()
|
|
stats.total_lines = len(lines)
|
|
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
stats.blank_lines += 1
|
|
elif stripped.startswith("#"):
|
|
stats.comment_lines += 1
|
|
else:
|
|
stats.code_lines += 1
|
|
|
|
try:
|
|
tree = ast.parse(content, filename=str(path))
|
|
except Exception:
|
|
return stats
|
|
|
|
visitor = CodeAnalyzer()
|
|
visitor.visit(tree)
|
|
stats.classes = visitor.classes
|
|
stats.functions = visitor.functions
|
|
stats.methods = visitor.methods
|
|
stats.top_level_decls = visitor.top_level_decls
|
|
|
|
return stats
|
|
|
|
|
|
def walk_python_files(root: Path) -> Iterator[Path]:
|
|
for path in root.rglob("*.py"):
|
|
if any(ignored in path.parts for ignored in IGNORED_PATHS):
|
|
continue
|
|
yield path
|
|
|
|
|
|
def gather_dir_stats(root: Path) -> DirStats:
|
|
stats = DirStats(path=str(root))
|
|
for py_file in sorted(walk_python_files(root)):
|
|
file_stats = analyze_file(py_file)
|
|
if file_stats:
|
|
stats.files.append(file_stats)
|
|
stats.total_lines += file_stats.total_lines
|
|
stats.code_lines += file_stats.code_lines
|
|
stats.comment_lines += file_stats.comment_lines
|
|
stats.blank_lines += file_stats.blank_lines
|
|
stats.class_count += len(file_stats.classes)
|
|
stats.function_count += len(file_stats.functions)
|
|
stats.method_count += len(file_stats.methods)
|
|
stats.file_count += 1
|
|
return stats
|
|
|
|
|
|
def format_bytes(num_bytes: int) -> str:
|
|
if num_bytes < 1024:
|
|
return f"{num_bytes}B"
|
|
elif num_bytes < 1024 * 1024:
|
|
return f"{num_bytes / 1024:.1f}KB"
|
|
else:
|
|
return f"{num_bytes / (1024 * 1024):.1f}MB"
|
|
|
|
|
|
def print_stats(d: DirStats) -> None:
|
|
print(f"\n{'=' * 60}")
|
|
print(f" {d.path}/")
|
|
print(f"{'=' * 60}")
|
|
print(f" Files: {d.file_count:,}")
|
|
print(f" Lines: {d.total_lines:,} (code: {d.code_lines:,} | comment: {d.comment_lines:,} | blank: {d.blank_lines:,})")
|
|
print(f" Classes: {d.class_count:,}")
|
|
print(f" Functions: {d.function_count:,}")
|
|
print(f" Methods: {d.method_count:,}")
|
|
total_decls = d.class_count + d.function_count + d.method_count
|
|
print(f" Total decls: {total_decls:,}")
|
|
|
|
code_bytes = sum(f.total_lines * 50 for f in d.files)
|
|
print(f" Est. code size: ~{format_bytes(code_bytes)}")
|
|
|
|
print()
|
|
if d.files:
|
|
print(f" {'File':<35} {'Lines':>8} {'Code':>7} {'Cmts':>6} {'Cls':>5} {'Fn':>5} {'Mth':>5}")
|
|
print(f" {'-' * 35} {'-' * 8} {'-' * 7} {'-' * 6} {'-' * 5} {'-' * 5} {'-' * 5}")
|
|
for f in sorted(d.files, key=lambda x: x.total_lines, reverse=True)[:20]:
|
|
print(f" {os.path.basename(f.path):<35} {f.total_lines:>8,} {f.code_lines:>7,} {f.comment_lines:>6,} {len(f.classes):>5} {len(f.functions):>5} {len(f.methods):>5}")
|
|
if len(d.files) > 20:
|
|
print(f" ... and {len(d.files) - 20} more files")
|
|
|
|
|
|
def main() -> None:
|
|
project_root = Path(__file__).parent.parent
|
|
print(f"Manual Slop — Codebase Statistics")
|
|
print(f"Generated: {__import__('datetime').datetime.now().isoformat()}")
|
|
print(f"Project root: {project_root}")
|
|
|
|
all_stats: list[DirStats] = []
|
|
for dirname in TARGET_DIRS:
|
|
dir_path = project_root / dirname
|
|
if dir_path.exists():
|
|
stats = gather_dir_stats(dir_path)
|
|
all_stats.append(stats)
|
|
print_stats(stats)
|
|
else:
|
|
print(f"\n[SKIPPED] {dirname}/ does not exist")
|
|
|
|
if len(all_stats) > 1:
|
|
combined = DirStats(path="(all)")
|
|
for s in all_stats:
|
|
combined.file_count += s.file_count
|
|
combined.total_lines += s.total_lines
|
|
combined.code_lines += s.code_lines
|
|
combined.comment_lines += s.comment_lines
|
|
combined.blank_lines += s.blank_lines
|
|
combined.class_count += s.class_count
|
|
combined.function_count += s.function_count
|
|
combined.method_count += s.method_count
|
|
print(f"\n{'=' * 60}")
|
|
print(f" COMBINED TOTALS")
|
|
print(f"{'=' * 60}")
|
|
print(f" Files: {combined.file_count:,}")
|
|
print(f" Lines: {combined.total_lines:,} (code: {combined.code_lines:,} | comment: {combined.comment_lines:,} | blank: {combined.blank_lines:,})")
|
|
print(f" Classes: {combined.class_count:,}")
|
|
print(f" Functions: {combined.function_count:,}")
|
|
print(f" Methods: {combined.method_count:,}")
|
|
total_decls = combined.class_count + combined.function_count + combined.method_count
|
|
print(f" Total decls: {total_decls:,}")
|
|
|
|
print(f"\nDone.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |