chore(scripts): remove superseded entropy and code-stat audits
These 4 scripts are superseded by the 2 active CI audit gates (audit_main_thread_imports.py, audit_weak_types.py). The entropy-era project tracking is no longer used. Removed (4 files, ~28 KB): - audit_entropy.py (3.1 KB) - early entropy auditor - comprehensive_entropy_audit.py (10.5 KB) - one-off audit - focused_entropy_audit.py (6.8 KB) - Muratori-style audit - code_stats.py (7.8 KB) - stats gatherer (no consumer) Active audit infrastructure kept: audit_main_thread_imports.py (CI gate), audit_weak_types.py (CI gate), check_test_toml_paths.py (CI gate), check_imgui_scopes.py (linter).
This commit is contained in:
@@ -1,98 +0,0 @@
|
|||||||
import os
|
|
||||||
import re
|
|
||||||
import ast
|
|
||||||
from collections import Counter
|
|
||||||
|
|
||||||
class ScopeAuditor(ast.NodeVisitor):
|
|
||||||
def __init__(self, findings):
|
|
||||||
self.scope_stack = [([], "")]
|
|
||||||
self.findings = findings
|
|
||||||
|
|
||||||
def visit_ClassDef(self, node):
|
|
||||||
self.scope_stack[-1][0].append(node.name)
|
|
||||||
parent_label = self.scope_stack[-1][1]
|
|
||||||
new_label = f"{parent_label}.{node.name}" if parent_label else node.name
|
|
||||||
self.scope_stack.append(([], new_label))
|
|
||||||
self.generic_visit(node)
|
|
||||||
defs, label = self.scope_stack.pop()
|
|
||||||
self.check_duplicates(defs, label)
|
|
||||||
|
|
||||||
def visit_FunctionDef(self, node):
|
|
||||||
self.scope_stack[-1][0].append(node.name)
|
|
||||||
parent_label = self.scope_stack[-1][1]
|
|
||||||
new_label = f"{parent_label}.{node.name}" if parent_label else node.name
|
|
||||||
self.scope_stack.append(([], new_label))
|
|
||||||
self.generic_visit(node)
|
|
||||||
defs, label = self.scope_stack.pop()
|
|
||||||
self.check_duplicates(defs, label)
|
|
||||||
|
|
||||||
def visit_AsyncFunctionDef(self, node):
|
|
||||||
self.visit_FunctionDef(node)
|
|
||||||
|
|
||||||
def check_duplicates(self, defs, label):
|
|
||||||
counts = Counter(defs)
|
|
||||||
for name, count in counts.items():
|
|
||||||
if count > 1:
|
|
||||||
scope_str = f" in scope '{label}'" if label else " at top-level"
|
|
||||||
self.findings.append(f"Duplicate definition{scope_str}: '{name}' ({count} times)")
|
|
||||||
|
|
||||||
def audit_file(path):
|
|
||||||
with open(path, 'r', encoding='utf-8') as f:
|
|
||||||
lines = f.readlines()
|
|
||||||
content = "".join(lines)
|
|
||||||
|
|
||||||
findings = []
|
|
||||||
|
|
||||||
# 1. Detect multiple identical import lines
|
|
||||||
imports = [line.strip() for line in lines if line.strip().startswith('import ')]
|
|
||||||
import_counts = Counter(imports)
|
|
||||||
for imp, count in import_counts.items():
|
|
||||||
if count > 1:
|
|
||||||
findings.append(f"Duplicate import: '{imp}' ({count} times)")
|
|
||||||
|
|
||||||
# 2. Detect multiple 'from X import Y' lines for the same module X and symbol Y
|
|
||||||
from_imports = [line.strip() for line in lines if line.strip().startswith('from ')]
|
|
||||||
from_counts = Counter(from_imports)
|
|
||||||
for imp, count in from_counts.items():
|
|
||||||
if count > 1:
|
|
||||||
findings.append(f"Duplicate from-import: '{imp}' ({count} times)")
|
|
||||||
|
|
||||||
# 3. Detect mixed indentation (look for 4-space blocks)
|
|
||||||
four_spaces = " "
|
|
||||||
for i, line in enumerate(lines):
|
|
||||||
if line.startswith(four_spaces):
|
|
||||||
findings.append(f"Mixed indentation: 4-space block found at line {i+1}")
|
|
||||||
break # Only report once per file
|
|
||||||
|
|
||||||
# 4. List all functions and classes that appear more than once in the same scope
|
|
||||||
try:
|
|
||||||
tree = ast.parse(content)
|
|
||||||
auditor = ScopeAuditor(findings)
|
|
||||||
auditor.visit(tree)
|
|
||||||
if auditor.scope_stack:
|
|
||||||
defs, label = auditor.scope_stack.pop()
|
|
||||||
auditor.check_duplicates(defs, label)
|
|
||||||
except Exception as e:
|
|
||||||
findings.append(f"AST Parse Error: {e}")
|
|
||||||
|
|
||||||
return findings
|
|
||||||
|
|
||||||
def main():
|
|
||||||
src_dir = 'src'
|
|
||||||
if not os.path.exists(src_dir):
|
|
||||||
print(f"Directory {src_dir} not found.")
|
|
||||||
return
|
|
||||||
|
|
||||||
for root, dirs, files in os.walk(src_dir):
|
|
||||||
for file in files:
|
|
||||||
if file.endswith('.py'):
|
|
||||||
path = os.path.join(root, file)
|
|
||||||
findings = audit_file(path)
|
|
||||||
if findings:
|
|
||||||
print(f"--- {path} ---")
|
|
||||||
for f in findings:
|
|
||||||
print(f" {f}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,227 +0,0 @@
|
|||||||
"""
|
|
||||||
Codebase Statistics Gatherer
|
|
||||||
|
|
||||||
Gathers metrics on the Manual Slop Python codebase:
|
|
||||||
- Line counts (total, code, comment, blank)
|
|
||||||
- Class counts and definitions
|
|
||||||
- Function/method counts and definitions
|
|
||||||
- Top-level declarations
|
|
||||||
- File sizes
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
uv run python scripts/code_stats.py
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
import ast
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from collections import defaultdict
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Iterator
|
|
||||||
|
|
||||||
TARGET_DIRS = ["src", "simulation", "tests", "scripts"]
|
|
||||||
IGNORED_PATHS = {"__pycache__", ".git", "node_modules", "venv", ".venv", "env", ".env"}
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class FileStats:
|
|
||||||
path: str
|
|
||||||
total_lines: int = 0
|
|
||||||
code_lines: int = 0
|
|
||||||
comment_lines: int = 0
|
|
||||||
blank_lines: int = 0
|
|
||||||
classes: list[str] = field(default_factory=list)
|
|
||||||
functions: list[str] = field(default_factory=list)
|
|
||||||
methods: list[str] = field(default_factory=list)
|
|
||||||
top_level_decls: list[str] = field(default_factory=list)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class DirStats:
|
|
||||||
path: str
|
|
||||||
files: list[FileStats] = field(default_factory=list)
|
|
||||||
total_lines: int = 0
|
|
||||||
code_lines: int = 0
|
|
||||||
comment_lines: int = 0
|
|
||||||
blank_lines: int = 0
|
|
||||||
class_count: int = 0
|
|
||||||
function_count: int = 0
|
|
||||||
method_count: int = 0
|
|
||||||
file_count: int = 0
|
|
||||||
|
|
||||||
|
|
||||||
class CodeAnalyzer(ast.NodeVisitor):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.classes: list[str] = []
|
|
||||||
self.functions: list[str] = []
|
|
||||||
self.methods: list[str] = []
|
|
||||||
self.top_level_decls: list[str] = []
|
|
||||||
self._in_class: bool = False
|
|
||||||
self._class_name: str = ""
|
|
||||||
|
|
||||||
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
||||||
self.classes.append(node.name)
|
|
||||||
self.top_level_decls.append(f"class {node.name}")
|
|
||||||
was_in_class = self._in_class
|
|
||||||
was_class_name = self._class_name
|
|
||||||
self._in_class = True
|
|
||||||
self._class_name = node.name
|
|
||||||
self.generic_visit(node)
|
|
||||||
self._in_class = was_in_class
|
|
||||||
self._class_name = was_class_name
|
|
||||||
|
|
||||||
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
||||||
name = node.name
|
|
||||||
if self._in_class:
|
|
||||||
self.methods.append(f"{self._class_name}.{name}")
|
|
||||||
else:
|
|
||||||
self.functions.append(name)
|
|
||||||
self.top_level_decls.append(f"def {name}")
|
|
||||||
self.generic_visit(node)
|
|
||||||
|
|
||||||
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
||||||
name = node.name
|
|
||||||
if self._in_class:
|
|
||||||
self.methods.append(f"{self._class_name}.{name}")
|
|
||||||
else:
|
|
||||||
self.functions.append(name)
|
|
||||||
self.top_level_decls.append(f"async def {name}")
|
|
||||||
self.generic_visit(node)
|
|
||||||
|
|
||||||
|
|
||||||
def analyze_file(path: Path) -> FileStats | None:
|
|
||||||
try:
|
|
||||||
content = path.read_text(encoding="utf-8", errors="replace")
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
stats = FileStats(path=str(path))
|
|
||||||
lines = content.splitlines()
|
|
||||||
stats.total_lines = len(lines)
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
stripped = line.strip()
|
|
||||||
if not stripped:
|
|
||||||
stats.blank_lines += 1
|
|
||||||
elif stripped.startswith("#"):
|
|
||||||
stats.comment_lines += 1
|
|
||||||
else:
|
|
||||||
stats.code_lines += 1
|
|
||||||
|
|
||||||
try:
|
|
||||||
tree = ast.parse(content, filename=str(path))
|
|
||||||
except Exception:
|
|
||||||
return stats
|
|
||||||
|
|
||||||
visitor = CodeAnalyzer()
|
|
||||||
visitor.visit(tree)
|
|
||||||
stats.classes = visitor.classes
|
|
||||||
stats.functions = visitor.functions
|
|
||||||
stats.methods = visitor.methods
|
|
||||||
stats.top_level_decls = visitor.top_level_decls
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
|
|
||||||
def walk_python_files(root: Path) -> Iterator[Path]:
|
|
||||||
for path in root.rglob("*.py"):
|
|
||||||
if any(ignored in path.parts for ignored in IGNORED_PATHS):
|
|
||||||
continue
|
|
||||||
yield path
|
|
||||||
|
|
||||||
|
|
||||||
def gather_dir_stats(root: Path) -> DirStats:
|
|
||||||
stats = DirStats(path=str(root))
|
|
||||||
for py_file in sorted(walk_python_files(root)):
|
|
||||||
file_stats = analyze_file(py_file)
|
|
||||||
if file_stats:
|
|
||||||
stats.files.append(file_stats)
|
|
||||||
stats.total_lines += file_stats.total_lines
|
|
||||||
stats.code_lines += file_stats.code_lines
|
|
||||||
stats.comment_lines += file_stats.comment_lines
|
|
||||||
stats.blank_lines += file_stats.blank_lines
|
|
||||||
stats.class_count += len(file_stats.classes)
|
|
||||||
stats.function_count += len(file_stats.functions)
|
|
||||||
stats.method_count += len(file_stats.methods)
|
|
||||||
stats.file_count += 1
|
|
||||||
return stats
|
|
||||||
|
|
||||||
|
|
||||||
def format_bytes(num_bytes: int) -> str:
|
|
||||||
if num_bytes < 1024:
|
|
||||||
return f"{num_bytes}B"
|
|
||||||
elif num_bytes < 1024 * 1024:
|
|
||||||
return f"{num_bytes / 1024:.1f}KB"
|
|
||||||
else:
|
|
||||||
return f"{num_bytes / (1024 * 1024):.1f}MB"
|
|
||||||
|
|
||||||
|
|
||||||
def print_stats(d: DirStats) -> None:
|
|
||||||
print(f"\n{'=' * 60}")
|
|
||||||
print(f" {d.path}/")
|
|
||||||
print(f"{'=' * 60}")
|
|
||||||
print(f" Files: {d.file_count:,}")
|
|
||||||
print(f" Lines: {d.total_lines:,} (code: {d.code_lines:,} | comment: {d.comment_lines:,} | blank: {d.blank_lines:,})")
|
|
||||||
print(f" Classes: {d.class_count:,}")
|
|
||||||
print(f" Functions: {d.function_count:,}")
|
|
||||||
print(f" Methods: {d.method_count:,}")
|
|
||||||
total_decls = d.class_count + d.function_count + d.method_count
|
|
||||||
print(f" Total decls: {total_decls:,}")
|
|
||||||
|
|
||||||
code_bytes = sum(f.total_lines * 50 for f in d.files)
|
|
||||||
print(f" Est. code size: ~{format_bytes(code_bytes)}")
|
|
||||||
|
|
||||||
print()
|
|
||||||
if d.files:
|
|
||||||
print(f" {'File':<35} {'Lines':>8} {'Code':>7} {'Cmts':>6} {'Cls':>5} {'Fn':>5} {'Mth':>5}")
|
|
||||||
print(f" {'-' * 35} {'-' * 8} {'-' * 7} {'-' * 6} {'-' * 5} {'-' * 5} {'-' * 5}")
|
|
||||||
for f in sorted(d.files, key=lambda x: x.total_lines, reverse=True)[:20]:
|
|
||||||
print(f" {os.path.basename(f.path):<35} {f.total_lines:>8,} {f.code_lines:>7,} {f.comment_lines:>6,} {len(f.classes):>5} {len(f.functions):>5} {len(f.methods):>5}")
|
|
||||||
if len(d.files) > 20:
|
|
||||||
print(f" ... and {len(d.files) - 20} more files")
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
project_root = Path(__file__).parent.parent
|
|
||||||
print(f"Manual Slop — Codebase Statistics")
|
|
||||||
print(f"Generated: {__import__('datetime').datetime.now().isoformat()}")
|
|
||||||
print(f"Project root: {project_root}")
|
|
||||||
|
|
||||||
all_stats: list[DirStats] = []
|
|
||||||
for dirname in TARGET_DIRS:
|
|
||||||
dir_path = project_root / dirname
|
|
||||||
if dir_path.exists():
|
|
||||||
stats = gather_dir_stats(dir_path)
|
|
||||||
all_stats.append(stats)
|
|
||||||
print_stats(stats)
|
|
||||||
else:
|
|
||||||
print(f"\n[SKIPPED] {dirname}/ does not exist")
|
|
||||||
|
|
||||||
if len(all_stats) > 1:
|
|
||||||
combined = DirStats(path="(all)")
|
|
||||||
for s in all_stats:
|
|
||||||
combined.file_count += s.file_count
|
|
||||||
combined.total_lines += s.total_lines
|
|
||||||
combined.code_lines += s.code_lines
|
|
||||||
combined.comment_lines += s.comment_lines
|
|
||||||
combined.blank_lines += s.blank_lines
|
|
||||||
combined.class_count += s.class_count
|
|
||||||
combined.function_count += s.function_count
|
|
||||||
combined.method_count += s.method_count
|
|
||||||
print(f"\n{'=' * 60}")
|
|
||||||
print(f" COMBINED TOTALS")
|
|
||||||
print(f"{'=' * 60}")
|
|
||||||
print(f" Files: {combined.file_count:,}")
|
|
||||||
print(f" Lines: {combined.total_lines:,} (code: {combined.code_lines:,} | comment: {combined.comment_lines:,} | blank: {combined.blank_lines:,})")
|
|
||||||
print(f" Classes: {combined.class_count:,}")
|
|
||||||
print(f" Functions: {combined.function_count:,}")
|
|
||||||
print(f" Methods: {combined.method_count:,}")
|
|
||||||
total_decls = combined.class_count + combined.function_count + combined.method_count
|
|
||||||
print(f" Total decls: {total_decls:,}")
|
|
||||||
|
|
||||||
print(f"\nDone.")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,266 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Comprehensive Entropy Audit Script for Manual Slop src/
|
|
||||||
Checks for:
|
|
||||||
1. Duplicate function definitions
|
|
||||||
2. Duplicate class definitions
|
|
||||||
3. Very long functions (>200 lines)
|
|
||||||
4. Nested imports within functions
|
|
||||||
5. Inconsistent patterns (TODO, FIXME comments)
|
|
||||||
6. Cyclomatic complexity indicators (nested conditionals)
|
|
||||||
7. Dead code indicators (unused variables, commented out code)
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import ast
|
|
||||||
from pathlib import Path
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from typing import List, Dict, Set, Optional
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class EntropyIssue:
|
|
||||||
file: str
|
|
||||||
line: int
|
|
||||||
severity: str # 'high', 'medium', 'low'
|
|
||||||
category: str
|
|
||||||
description: str
|
|
||||||
detail: str = ""
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class FileAnalysis:
|
|
||||||
path: str
|
|
||||||
size_kb: float
|
|
||||||
issues: List[EntropyIssue] = field(default_factory=list)
|
|
||||||
stats: Dict = field(default_factory=dict)
|
|
||||||
|
|
||||||
class EntropyAuditor:
|
|
||||||
def __init__(self, src_dir: str = "src"):
|
|
||||||
self.src_dir = Path(src_dir)
|
|
||||||
self.issues: List[EntropyIssue] = []
|
|
||||||
self.files_analyzed = 0
|
|
||||||
self.total_lines = 0
|
|
||||||
|
|
||||||
def analyze_file(self, filepath: Path) -> FileAnalysis:
|
|
||||||
with open(filepath, encoding='utf-8', errors='ignore') as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
lines = content.split('\n')
|
|
||||||
self.total_lines += len(lines)
|
|
||||||
|
|
||||||
analysis = FileAnalysis(
|
|
||||||
path=str(filepath),
|
|
||||||
size_kb=filepath.stat().st_size / 1024
|
|
||||||
)
|
|
||||||
|
|
||||||
# 1. Check for nested imports
|
|
||||||
self._check_nested_imports(filepath, content)
|
|
||||||
|
|
||||||
# 2. Check for very long functions
|
|
||||||
self._check_long_functions(filepath, content)
|
|
||||||
|
|
||||||
# 3. Check for TODO/FIXME
|
|
||||||
self._check_todos(filepath, content)
|
|
||||||
|
|
||||||
# 4. Check for nested depth (complexity)
|
|
||||||
self._check_nesting_depth(filepath, lines)
|
|
||||||
|
|
||||||
# 5. Check for duplicate code patterns
|
|
||||||
self._check_duplicate_patterns(filepath, lines)
|
|
||||||
|
|
||||||
# 6. Check for magic numbers
|
|
||||||
self._check_magic_numbers(filepath, lines)
|
|
||||||
|
|
||||||
return analysis
|
|
||||||
|
|
||||||
def _check_nested_imports(self, filepath: Path, content: str) -> None:
|
|
||||||
"""Check for imports inside function bodies."""
|
|
||||||
tree = ast.parse(content, filename=str(filepath))
|
|
||||||
for node in ast.walk(tree):
|
|
||||||
if isinstance(node, ast.FunctionDef):
|
|
||||||
for child in ast.walk(node):
|
|
||||||
if isinstance(child, (ast.Import, ast.ImportFrom)):
|
|
||||||
# Check if it's not at module level
|
|
||||||
if not any(isinstance(p, (ast.Import, ast.ImportFrom)) for p in tree.body):
|
|
||||||
line = child.lineno or 0
|
|
||||||
self.issues.append(EntropyIssue(
|
|
||||||
file=str(filepath),
|
|
||||||
line=line,
|
|
||||||
severity='medium',
|
|
||||||
category='nested_import',
|
|
||||||
description=f'Nested import in function `{node.name}`',
|
|
||||||
detail=ast.unparse(child)[:100]
|
|
||||||
))
|
|
||||||
|
|
||||||
def _check_long_functions(self, filepath: Path, content: str) -> None:
|
|
||||||
"""Check for functions with >200 lines or >10 parameters."""
|
|
||||||
tree = ast.parse(content, filename=str(filepath))
|
|
||||||
for node in ast.walk(tree):
|
|
||||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
||||||
if node.end_lineno and node.lineno:
|
|
||||||
length = node.end_lineno - node.lineno
|
|
||||||
if length > 200:
|
|
||||||
self.issues.append(EntropyIssue(
|
|
||||||
file=str(filepath),
|
|
||||||
line=node.lineno,
|
|
||||||
severity='high',
|
|
||||||
category='long_function',
|
|
||||||
description=f'Function `{node.name}` is {length} lines (>{200})',
|
|
||||||
detail=f'Lines {node.lineno}-{node.end_lineno}'
|
|
||||||
))
|
|
||||||
if len(node.args.args) > 10:
|
|
||||||
self.issues.append(EntropyIssue(
|
|
||||||
file=str(filepath),
|
|
||||||
line=node.lineno,
|
|
||||||
severity='medium',
|
|
||||||
category='too_many_params',
|
|
||||||
description=f'Function `{node.name}` has {len(node.args.args)} parameters',
|
|
||||||
detail=str(node.args.args[:5]) + '...'
|
|
||||||
))
|
|
||||||
|
|
||||||
def _check_todos(self, filepath: Path, content: str) -> None:
|
|
||||||
"""Check for TODO/FIXME/BUG comments."""
|
|
||||||
for i, line in enumerate(content.split('\n'), 1):
|
|
||||||
if re.search(r'(TODO|FIXME|BUG|HACK|XXX)', line, re.IGNORECASE):
|
|
||||||
self.issues.append(EntropyIssue(
|
|
||||||
file=str(filepath),
|
|
||||||
line=i,
|
|
||||||
severity='low',
|
|
||||||
category='tech_debt',
|
|
||||||
description=line.strip()[:80],
|
|
||||||
detail=f'Technical debt marker'
|
|
||||||
))
|
|
||||||
|
|
||||||
def _check_nesting_depth(self, filepath: Path, lines: List[str]) -> None:
|
|
||||||
"""Check for deeply nested code blocks."""
|
|
||||||
for i, line in enumerate(lines, 1):
|
|
||||||
if line and not line.strip().startswith('#'):
|
|
||||||
# Count leading spaces
|
|
||||||
stripped = line.lstrip()
|
|
||||||
indent = len(line) - len(stripped)
|
|
||||||
if indent > 20: # More than ~10 levels deep
|
|
||||||
self.issues.append(EntropyIssue(
|
|
||||||
file=str(filepath),
|
|
||||||
line=i,
|
|
||||||
severity='medium',
|
|
||||||
category='deep_nesting',
|
|
||||||
description=f'Line has {indent} spaces of indentation',
|
|
||||||
detail=line.strip()[:60]
|
|
||||||
))
|
|
||||||
|
|
||||||
def _check_duplicate_patterns(self, filepath: Path, lines: List[str]) -> None:
|
|
||||||
"""Check for consecutive duplicate non-blank lines."""
|
|
||||||
prev_line = None
|
|
||||||
dup_start = None
|
|
||||||
for i, line in enumerate(lines, 1):
|
|
||||||
stripped = line.strip()
|
|
||||||
if stripped and not stripped.startswith('#') and stripped == prev_line:
|
|
||||||
if dup_start is None:
|
|
||||||
dup_start = i - 1
|
|
||||||
else:
|
|
||||||
if dup_start and i - dup_start > 2:
|
|
||||||
self.issues.append(EntropyIssue(
|
|
||||||
file=str(filepath),
|
|
||||||
line=dup_start,
|
|
||||||
severity='high',
|
|
||||||
category='duplicate_lines',
|
|
||||||
description=f'{i - dup_start} consecutive duplicate lines starting at {dup_start}',
|
|
||||||
detail=lines[dup_start-1].strip()[:60] if dup_start <= len(lines) else ''
|
|
||||||
))
|
|
||||||
dup_start = None
|
|
||||||
prev_line = stripped
|
|
||||||
|
|
||||||
def _check_magic_numbers(self, filepath: Path, lines: List[str]) -> None:
|
|
||||||
"""Check for magic numbers (unnamed constants)."""
|
|
||||||
magic_pattern = re.compile(r'(?<!\w)([0-9]{3,})(?!\w)') # Numbers with 3+ digits
|
|
||||||
for i, line in enumerate(lines, 1):
|
|
||||||
if not line.strip().startswith('#'):
|
|
||||||
matches = magic_pattern.findall(line)
|
|
||||||
for m in matches:
|
|
||||||
self.issues.append(EntropyIssue(
|
|
||||||
file=str(filepath),
|
|
||||||
line=i,
|
|
||||||
severity='low',
|
|
||||||
category='magic_number',
|
|
||||||
description=f'Magic number: {m}',
|
|
||||||
detail=line.strip()[:60]
|
|
||||||
))
|
|
||||||
|
|
||||||
def run_audit(self) -> None:
|
|
||||||
"""Run audit on all Python files in src/."""
|
|
||||||
py_files = list(self.src_dir.glob("*.py"))
|
|
||||||
print(f"Auditing {len(py_files)} Python files in {self.src_dir}...")
|
|
||||||
|
|
||||||
for filepath in sorted(py_files):
|
|
||||||
if filepath.name == "__init__.py":
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
self.analyze_file(filepath)
|
|
||||||
self.files_analyzed += 1
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error analyzing {filepath}: {e}")
|
|
||||||
|
|
||||||
def generate_report(self) -> str:
|
|
||||||
"""Generate a markdown report of findings."""
|
|
||||||
by_severity = {'high': [], 'medium': [], 'low': []}
|
|
||||||
by_category = {}
|
|
||||||
|
|
||||||
for issue in self.issues:
|
|
||||||
by_severity[issue.severity].append(issue)
|
|
||||||
if issue.category not in by_category:
|
|
||||||
by_category[issue.category] = []
|
|
||||||
by_category[issue.category].append(issue)
|
|
||||||
|
|
||||||
report = [
|
|
||||||
"# Entropy Audit Report: src/",
|
|
||||||
"",
|
|
||||||
f"**Files Analyzed:** {self.files_analyzed}",
|
|
||||||
f"**Total Lines:** {self.total_lines:,}",
|
|
||||||
f"**Issues Found:** {len(self.issues)}",
|
|
||||||
"",
|
|
||||||
"## Summary by Severity",
|
|
||||||
"",
|
|
||||||
f"- **High:** {len(by_severity['high'])}",
|
|
||||||
f"- **Medium:** {len(by_severity['medium'])}",
|
|
||||||
f"- **Low:** {len(by_severity['low'])}",
|
|
||||||
"",
|
|
||||||
"## Summary by Category",
|
|
||||||
""
|
|
||||||
]
|
|
||||||
|
|
||||||
for cat, issues in sorted(by_category.items()):
|
|
||||||
report.append(f"- **{cat}:** {len(issues)}")
|
|
||||||
|
|
||||||
report.extend(["", "## High Severity Issues", ""])
|
|
||||||
for issue in sorted(by_severity['high'], key=lambda x: (x.file, x.line)):
|
|
||||||
report.append(f"### {issue.file}")
|
|
||||||
report.append(f"- **Line {issue.line}:** {issue.description}")
|
|
||||||
if issue.detail:
|
|
||||||
report.append(f" - Detail: `{issue.detail[:80]}`")
|
|
||||||
report.append("")
|
|
||||||
|
|
||||||
report.extend(["", "## Medium Severity Issues", ""])
|
|
||||||
for issue in sorted(by_severity['medium'], key=lambda x: (x.file, x.line))[:50]:
|
|
||||||
report.append(f"- **Line {issue.line}** ({issue.file}): {issue.description}")
|
|
||||||
|
|
||||||
if len(by_severity['medium']) > 50:
|
|
||||||
report.append(f"\n_... and {len(by_severity['medium']) - 50} more medium issues_")
|
|
||||||
|
|
||||||
return "\n".join(report)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
auditor = EntropyAuditor("src")
|
|
||||||
auditor.run_audit()
|
|
||||||
report = auditor.generate_report()
|
|
||||||
print(report)
|
|
||||||
|
|
||||||
# Also write to file
|
|
||||||
report_path = "conductor/tracks/data_oriented_optimization_20260312/entropy_audit_report.md"
|
|
||||||
with open(report_path, 'w') as f:
|
|
||||||
f.write(report)
|
|
||||||
print(f"\nReport written to {report_path}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,199 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Focused Entropy Audit for Manual Slop - Muratori Style
|
|
||||||
Focuses on ACTUAL issues, not style:
|
|
||||||
1. Duplicate logic (same thing done in multiple places)
|
|
||||||
2. State inconsistencies (parallel representations)
|
|
||||||
3. Logic errors / bugs
|
|
||||||
4. Performance concerns
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import ast
|
|
||||||
from pathlib import Path
|
|
||||||
from collections import defaultdict
|
|
||||||
from typing import List, Dict, Set, Tuple, Optional
|
|
||||||
|
|
||||||
def find_duplicate_logic_files():
|
|
||||||
"""Find files with similar patterns that might indicate duplicate logic."""
|
|
||||||
patterns = {
|
|
||||||
'calculate_track_progress': [],
|
|
||||||
'cascade_blocks': [],
|
|
||||||
'topological_sort': [],
|
|
||||||
'push_mma_state': [],
|
|
||||||
'active_tickets': [],
|
|
||||||
}
|
|
||||||
|
|
||||||
for f in Path('src').glob('*.py'):
|
|
||||||
content = f.read_text(encoding='utf-8', errors='ignore')
|
|
||||||
for pattern in patterns:
|
|
||||||
if re.search(pattern, content):
|
|
||||||
patterns[pattern].append(f.name)
|
|
||||||
|
|
||||||
return patterns
|
|
||||||
|
|
||||||
def check_ticket_state_management():
|
|
||||||
"""Check for state management issues in ticket handling."""
|
|
||||||
issues = []
|
|
||||||
|
|
||||||
# Check if there are parallel ticket representations
|
|
||||||
gui_2_content = Path('src/gui_2.py').read_text(encoding='utf-8', errors='ignore')
|
|
||||||
app_ctrl_content = Path('src/app_controller.py').read_text(encoding='utf-8', errors='ignore')
|
|
||||||
dag_content = Path('src/dag_engine.py').read_text(encoding='utf-8', errors='ignore')
|
|
||||||
|
|
||||||
# gui_2 uses dict-based tickets
|
|
||||||
if 'active_tickets' in gui_2_content:
|
|
||||||
if 'ticket["status"]' in gui_2_content or "t['status']" in gui_2_content:
|
|
||||||
issues.append(("gui_2.py", "Dict-based ticket access pattern found"))
|
|
||||||
|
|
||||||
# Check for blocking logic duplication
|
|
||||||
gui_blocking = len(re.findall(r'_cb_block_ticket|_cb_unblock_ticket', gui_2_content))
|
|
||||||
dag_blocking = len(re.findall(r'cascade_blocks', dag_content))
|
|
||||||
|
|
||||||
if gui_blocking > 0 and dag_blocking > 0:
|
|
||||||
issues.append(("architecture", "GUI has manual block/unblock that could conflict with DAG cascade_blocks"))
|
|
||||||
|
|
||||||
return issues
|
|
||||||
|
|
||||||
def check_import_issues():
|
|
||||||
"""Check for actual import problems - nested imports causing runtime issues."""
|
|
||||||
issues = []
|
|
||||||
|
|
||||||
for f in Path('src').glob('*.py'):
|
|
||||||
try:
|
|
||||||
content = f.read_text(encoding='utf-8', errors='ignore')
|
|
||||||
tree = ast.parse(content, filename=str(f))
|
|
||||||
|
|
||||||
for node in ast.walk(tree):
|
|
||||||
if isinstance(node, ast.FunctionDef):
|
|
||||||
for child in ast.walk(node):
|
|
||||||
if isinstance(child, (ast.Import, ast.ImportFrom)):
|
|
||||||
line = child.lineno or 0
|
|
||||||
# Check if this import is inside a HOT PATH function
|
|
||||||
if node.name in ['_process_pending_gui_tasks', '_gui_func', 'run', 'tick']:
|
|
||||||
issues.append((f.name, f"Nested import `{ast.unparse(child).strip()[:50]}` in hot path `{node.name}` line {line}"))
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return issues
|
|
||||||
|
|
||||||
def check_potential_bugs():
|
|
||||||
"""Check for potential bugs - undefined variables, etc."""
|
|
||||||
bugs = []
|
|
||||||
|
|
||||||
# Check for == None vs is None patterns
|
|
||||||
for f in Path('src').glob('*.py'):
|
|
||||||
content = f.read_text(encoding='utf-8', errors='ignore')
|
|
||||||
lines = content.split('\n')
|
|
||||||
|
|
||||||
for i, line in enumerate(lines, 1):
|
|
||||||
# Skip comments
|
|
||||||
if line.strip().startswith('#'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check for mutable default arguments (common bug)
|
|
||||||
if re.search(r'def\s+\w+\([^)]*=\s*\[\s*\]', line):
|
|
||||||
bugs.append((f.name, i, "Mutable default argument", line.strip()[:60]))
|
|
||||||
if re.search(r'def\s+\w+\([^)]*=\s*\{\s*\}', line):
|
|
||||||
bugs.append((f.name, i, "Mutable default argument", line.strip()[:60]))
|
|
||||||
|
|
||||||
return bugs
|
|
||||||
|
|
||||||
def check_actual_duplicates():
|
|
||||||
"""Check for ACTUAL duplicate code - same logic copied."""
|
|
||||||
duplicates = []
|
|
||||||
seen_snippets = defaultdict(list)
|
|
||||||
|
|
||||||
# Look for duplicate patterns (3+ lines identical)
|
|
||||||
for f in sorted(Path('src').glob('*.py')):
|
|
||||||
try:
|
|
||||||
content = f.read_text(encoding='utf-8', errors='ignore')
|
|
||||||
lines = content.split('\n')
|
|
||||||
|
|
||||||
# Normalize and check consecutive duplicate lines
|
|
||||||
prev_normalized = None
|
|
||||||
dup_start = None
|
|
||||||
|
|
||||||
for i, line in enumerate(lines, 1):
|
|
||||||
if line.strip().startswith('#'):
|
|
||||||
prev_normalized = None
|
|
||||||
dup_start = None
|
|
||||||
continue
|
|
||||||
|
|
||||||
normalized = line.strip()
|
|
||||||
if not normalized:
|
|
||||||
prev_normalized = None
|
|
||||||
dup_start = None
|
|
||||||
continue
|
|
||||||
|
|
||||||
if normalized == prev_normalized:
|
|
||||||
if dup_start is None:
|
|
||||||
dup_start = i - 1
|
|
||||||
else:
|
|
||||||
if dup_start and i - dup_start >= 3:
|
|
||||||
# Found 3+ consecutive duplicate lines
|
|
||||||
duplicates.append((f.name, dup_start, i - 1, lines[dup_start-1].strip()[:60]))
|
|
||||||
dup_start = None
|
|
||||||
prev_normalized = normalized
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return duplicates
|
|
||||||
|
|
||||||
def main():
|
|
||||||
print("=" * 70)
|
|
||||||
print("FOCUSED ENTROPY AUDIT - Muratori Style")
|
|
||||||
print("=" * 70)
|
|
||||||
print()
|
|
||||||
|
|
||||||
print("1. TICKET STATE MANAGEMENT ISSUES")
|
|
||||||
print("-" * 40)
|
|
||||||
issues = check_ticket_state_management()
|
|
||||||
if issues:
|
|
||||||
for issue in issues:
|
|
||||||
print(f" [{issue[0]}] {issue[1]}")
|
|
||||||
else:
|
|
||||||
print(" None found")
|
|
||||||
print()
|
|
||||||
|
|
||||||
print("2. NESTED IMPORTS IN HOT PATH FUNCTIONS")
|
|
||||||
print("-" * 40)
|
|
||||||
issues = check_import_issues()
|
|
||||||
if issues:
|
|
||||||
for fname, msg in issues[:10]:
|
|
||||||
print(f" [{fname}] {msg}")
|
|
||||||
else:
|
|
||||||
print(" None found")
|
|
||||||
print()
|
|
||||||
|
|
||||||
print("3. POTENTIAL BUGS (mutable defaults, etc)")
|
|
||||||
print("-" * 40)
|
|
||||||
issues = check_potential_bugs()
|
|
||||||
if issues:
|
|
||||||
for fname, line, bugtype, code in issues[:10]:
|
|
||||||
print(f" [{fname}:{line}] {bugtype}: {code}")
|
|
||||||
else:
|
|
||||||
print(" None found")
|
|
||||||
print()
|
|
||||||
|
|
||||||
print("4. ACTUAL DUPLICATE CODE (3+ consecutive lines)")
|
|
||||||
print("-" * 40)
|
|
||||||
duplicates = check_actual_duplicates()
|
|
||||||
if duplicates:
|
|
||||||
for fname, start, end, code in duplicates[:10]:
|
|
||||||
print(f" [{fname}:{start}-{end}] {code}")
|
|
||||||
else:
|
|
||||||
print(" None found")
|
|
||||||
print()
|
|
||||||
|
|
||||||
print("5. PATTERN USAGE ACROSS FILES")
|
|
||||||
print("-" * 40)
|
|
||||||
patterns = find_duplicate_logic_files()
|
|
||||||
for pattern, files in patterns.items():
|
|
||||||
if len(files) > 1:
|
|
||||||
print(f" {pattern}: {files}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Reference in New Issue
Block a user