#!/usr/bin/env python3 """ Comprehensive Entropy Audit Script for Manual Slop src/ Checks for: 1. Duplicate function definitions 2. Duplicate class definitions 3. Very long functions (>200 lines) 4. Nested imports within functions 5. Inconsistent patterns (TODO, FIXME comments) 6. Cyclomatic complexity indicators (nested conditionals) 7. Dead code indicators (unused variables, commented out code) """ import os import re import ast from pathlib import Path from dataclasses import dataclass, field from typing import List, Dict, Set, Optional @dataclass class EntropyIssue: file: str line: int severity: str # 'high', 'medium', 'low' category: str description: str detail: str = "" @dataclass class FileAnalysis: path: str size_kb: float issues: List[EntropyIssue] = field(default_factory=list) stats: Dict = field(default_factory=dict) class EntropyAuditor: def __init__(self, src_dir: str = "src"): self.src_dir = Path(src_dir) self.issues: List[EntropyIssue] = [] self.files_analyzed = 0 self.total_lines = 0 def analyze_file(self, filepath: Path) -> FileAnalysis: with open(filepath, encoding='utf-8', errors='ignore') as f: content = f.read() lines = content.split('\n') self.total_lines += len(lines) analysis = FileAnalysis( path=str(filepath), size_kb=filepath.stat().st_size / 1024 ) # 1. Check for nested imports self._check_nested_imports(filepath, content) # 2. Check for very long functions self._check_long_functions(filepath, content) # 3. Check for TODO/FIXME self._check_todos(filepath, content) # 4. Check for nested depth (complexity) self._check_nesting_depth(filepath, lines) # 5. Check for duplicate code patterns self._check_duplicate_patterns(filepath, lines) # 6. Check for magic numbers self._check_magic_numbers(filepath, lines) return analysis def _check_nested_imports(self, filepath: Path, content: str) -> None: """Check for imports inside function bodies.""" tree = ast.parse(content, filename=str(filepath)) for node in ast.walk(tree): if isinstance(node, ast.FunctionDef): for child in ast.walk(node): if isinstance(child, (ast.Import, ast.ImportFrom)): # Check if it's not at module level if not any(isinstance(p, (ast.Import, ast.ImportFrom)) for p in tree.body): line = child.lineno or 0 self.issues.append(EntropyIssue( file=str(filepath), line=line, severity='medium', category='nested_import', description=f'Nested import in function `{node.name}`', detail=ast.unparse(child)[:100] )) def _check_long_functions(self, filepath: Path, content: str) -> None: """Check for functions with >200 lines or >10 parameters.""" tree = ast.parse(content, filename=str(filepath)) for node in ast.walk(tree): if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): if node.end_lineno and node.lineno: length = node.end_lineno - node.lineno if length > 200: self.issues.append(EntropyIssue( file=str(filepath), line=node.lineno, severity='high', category='long_function', description=f'Function `{node.name}` is {length} lines (>{200})', detail=f'Lines {node.lineno}-{node.end_lineno}' )) if len(node.args.args) > 10: self.issues.append(EntropyIssue( file=str(filepath), line=node.lineno, severity='medium', category='too_many_params', description=f'Function `{node.name}` has {len(node.args.args)} parameters', detail=str(node.args.args[:5]) + '...' )) def _check_todos(self, filepath: Path, content: str) -> None: """Check for TODO/FIXME/BUG comments.""" for i, line in enumerate(content.split('\n'), 1): if re.search(r'(TODO|FIXME|BUG|HACK|XXX)', line, re.IGNORECASE): self.issues.append(EntropyIssue( file=str(filepath), line=i, severity='low', category='tech_debt', description=line.strip()[:80], detail=f'Technical debt marker' )) def _check_nesting_depth(self, filepath: Path, lines: List[str]) -> None: """Check for deeply nested code blocks.""" for i, line in enumerate(lines, 1): if line and not line.strip().startswith('#'): # Count leading spaces stripped = line.lstrip() indent = len(line) - len(stripped) if indent > 20: # More than ~10 levels deep self.issues.append(EntropyIssue( file=str(filepath), line=i, severity='medium', category='deep_nesting', description=f'Line has {indent} spaces of indentation', detail=line.strip()[:60] )) def _check_duplicate_patterns(self, filepath: Path, lines: List[str]) -> None: """Check for consecutive duplicate non-blank lines.""" prev_line = None dup_start = None for i, line in enumerate(lines, 1): stripped = line.strip() if stripped and not stripped.startswith('#') and stripped == prev_line: if dup_start is None: dup_start = i - 1 else: if dup_start and i - dup_start > 2: self.issues.append(EntropyIssue( file=str(filepath), line=dup_start, severity='high', category='duplicate_lines', description=f'{i - dup_start} consecutive duplicate lines starting at {dup_start}', detail=lines[dup_start-1].strip()[:60] if dup_start <= len(lines) else '' )) dup_start = None prev_line = stripped def _check_magic_numbers(self, filepath: Path, lines: List[str]) -> None: """Check for magic numbers (unnamed constants).""" magic_pattern = re.compile(r'(? None: """Run audit on all Python files in src/.""" py_files = list(self.src_dir.glob("*.py")) print(f"Auditing {len(py_files)} Python files in {self.src_dir}...") for filepath in sorted(py_files): if filepath.name == "__init__.py": continue try: self.analyze_file(filepath) self.files_analyzed += 1 except Exception as e: print(f"Error analyzing {filepath}: {e}") def generate_report(self) -> str: """Generate a markdown report of findings.""" by_severity = {'high': [], 'medium': [], 'low': []} by_category = {} for issue in self.issues: by_severity[issue.severity].append(issue) if issue.category not in by_category: by_category[issue.category] = [] by_category[issue.category].append(issue) report = [ "# Entropy Audit Report: src/", "", f"**Files Analyzed:** {self.files_analyzed}", f"**Total Lines:** {self.total_lines:,}", f"**Issues Found:** {len(self.issues)}", "", "## Summary by Severity", "", f"- **High:** {len(by_severity['high'])}", f"- **Medium:** {len(by_severity['medium'])}", f"- **Low:** {len(by_severity['low'])}", "", "## Summary by Category", "" ] for cat, issues in sorted(by_category.items()): report.append(f"- **{cat}:** {len(issues)}") report.extend(["", "## High Severity Issues", ""]) for issue in sorted(by_severity['high'], key=lambda x: (x.file, x.line)): report.append(f"### {issue.file}") report.append(f"- **Line {issue.line}:** {issue.description}") if issue.detail: report.append(f" - Detail: `{issue.detail[:80]}`") report.append("") report.extend(["", "## Medium Severity Issues", ""]) for issue in sorted(by_severity['medium'], key=lambda x: (x.file, x.line))[:50]: report.append(f"- **Line {issue.line}** ({issue.file}): {issue.description}") if len(by_severity['medium']) > 50: report.append(f"\n_... and {len(by_severity['medium']) - 50} more medium issues_") return "\n".join(report) def main(): auditor = EntropyAuditor("src") auditor.run_audit() report = auditor.generate_report() print(report) # Also write to file report_path = "conductor/tracks/data_oriented_optimization_20260312/entropy_audit_report.md" with open(report_path, 'w') as f: f.write(report) print(f"\nReport written to {report_path}") if __name__ == "__main__": main()