manual_slop/scripts/comprehensive_entropy_audit.py

#!/usr/bin/env python3
"""
Comprehensive Entropy Audit Script for Manual Slop src/
Checks for:
1. Duplicate function definitions
2. Duplicate class definitions
3. Very long functions (>200 lines)
4. Nested imports within functions
5. Inconsistent patterns (TODO, FIXME comments)
6. Cyclomatic complexity indicators (nested conditionals)
7. Dead code indicators (unused variables, commented out code)
"""

import os
import re
import ast
from pathlib import Path
from dataclasses import dataclass, field
from typing import List, Dict, Set, Optional

@dataclass
class EntropyIssue:
    file: str
    line: int
    severity: str  # 'high', 'medium', 'low'
    category: str
    description: str
    detail: str = ""

@dataclass
class FileAnalysis:
    path: str
    size_kb: float
    issues: List[EntropyIssue] = field(default_factory=list)
    stats: Dict = field(default_factory=dict)

class EntropyAuditor:
    def __init__(self, src_dir: str = "src"):
        self.src_dir = Path(src_dir)
        self.issues: List[EntropyIssue] = []
        self.files_analyzed = 0
        self.total_lines = 0

    def analyze_file(self, filepath: Path) -> FileAnalysis:
        with open(filepath, encoding='utf-8', errors='ignore') as f:
            content = f.read()

        lines = content.split('\n')
        self.total_lines += len(lines)

        analysis = FileAnalysis(
            path=str(filepath),
            size_kb=filepath.stat().st_size / 1024
        )

        # 1. Check for nested imports
        self._check_nested_imports(filepath, content)

        # 2. Check for very long functions
        self._check_long_functions(filepath, content)

        # 3. Check for TODO/FIXME
        self._check_todos(filepath, content)

        # 4. Check for nested depth (complexity)
        self._check_nesting_depth(filepath, lines)

        # 5. Check for duplicate code patterns
        self._check_duplicate_patterns(filepath, lines)

        # 6. Check for magic numbers
        self._check_magic_numbers(filepath, lines)

        return analysis

    def _check_nested_imports(self, filepath: Path, content: str) -> None:
        """Check for imports inside function bodies."""
        tree = ast.parse(content, filename=str(filepath))
        for node in ast.walk(tree):
            if isinstance(node, ast.FunctionDef):
                for child in ast.walk(node):
                    if isinstance(child, (ast.Import, ast.ImportFrom)):
                        # Check if it's not at module level
                        if not any(isinstance(p, (ast.Import, ast.ImportFrom)) for p in tree.body):
                            line = child.lineno or 0
                            self.issues.append(EntropyIssue(
                                file=str(filepath),
                                line=line,
                                severity='medium',
                                category='nested_import',
                                description=f'Nested import in function `{node.name}`',
                                detail=ast.unparse(child)[:100]
                            ))

    def _check_long_functions(self, filepath: Path, content: str) -> None:
        """Check for functions with >200 lines or >10 parameters."""
        tree = ast.parse(content, filename=str(filepath))
        for node in ast.walk(tree):
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
                if node.end_lineno and node.lineno:
                    length = node.end_lineno - node.lineno
                    if length > 200:
                        self.issues.append(EntropyIssue(
                            file=str(filepath),
                            line=node.lineno,
                            severity='high',
                            category='long_function',
                            description=f'Function `{node.name}` is {length} lines (>{200})',
                            detail=f'Lines {node.lineno}-{node.end_lineno}'
                        ))
                    if len(node.args.args) > 10:
                        self.issues.append(EntropyIssue(
                            file=str(filepath),
                            line=node.lineno,
                            severity='medium',
                            category='too_many_params',
                            description=f'Function `{node.name}` has {len(node.args.args)} parameters',
                            detail=str(node.args.args[:5]) + '...'
                        ))

    def _check_todos(self, filepath: Path, content: str) -> None:
        """Check for TODO/FIXME/BUG comments."""
        for i, line in enumerate(content.split('\n'), 1):
            if re.search(r'(TODO|FIXME|BUG|HACK|XXX)', line, re.IGNORECASE):
                self.issues.append(EntropyIssue(
                    file=str(filepath),
                    line=i,
                    severity='low',
                    category='tech_debt',
                    description=line.strip()[:80],
                    detail=f'Technical debt marker'
                ))

    def _check_nesting_depth(self, filepath: Path, lines: List[str]) -> None:
        """Check for deeply nested code blocks."""
        for i, line in enumerate(lines, 1):
            if line and not line.strip().startswith('#'):
                # Count leading spaces
                stripped = line.lstrip()
                indent = len(line) - len(stripped)
                if indent > 20:  # More than ~10 levels deep
                    self.issues.append(EntropyIssue(
                        file=str(filepath),
                        line=i,
                        severity='medium',
                        category='deep_nesting',
                        description=f'Line has {indent} spaces of indentation',
                        detail=line.strip()[:60]
                    ))

    def _check_duplicate_patterns(self, filepath: Path, lines: List[str]) -> None:
        """Check for consecutive duplicate non-blank lines."""
        prev_line = None
        dup_start = None
        for i, line in enumerate(lines, 1):
            stripped = line.strip()
            if stripped and not stripped.startswith('#') and stripped == prev_line:
                if dup_start is None:
                    dup_start = i - 1
            else:
                if dup_start and i - dup_start > 2:
                    self.issues.append(EntropyIssue(
                        file=str(filepath),
                        line=dup_start,
                        severity='high',
                        category='duplicate_lines',
                        description=f'{i - dup_start} consecutive duplicate lines starting at {dup_start}',
                        detail=lines[dup_start-1].strip()[:60] if dup_start <= len(lines) else ''
                    ))
                dup_start = None
                prev_line = stripped

    def _check_magic_numbers(self, filepath: Path, lines: List[str]) -> None:
        """Check for magic numbers (unnamed constants)."""
        magic_pattern = re.compile(r'(?<!\w)([0-9]{3,})(?!\w)')  # Numbers with 3+ digits
        for i, line in enumerate(lines, 1):
            if not line.strip().startswith('#'):
                matches = magic_pattern.findall(line)
                for m in matches:
                    self.issues.append(EntropyIssue(
                        file=str(filepath),
                        line=i,
                        severity='low',
                        category='magic_number',
                        description=f'Magic number: {m}',
                        detail=line.strip()[:60]
                    ))

    def run_audit(self) -> None:
        """Run audit on all Python files in src/."""
        py_files = list(self.src_dir.glob("*.py"))
        print(f"Auditing {len(py_files)} Python files in {self.src_dir}...")

        for filepath in sorted(py_files):
            if filepath.name == "__init__.py":
                continue
            try:
                self.analyze_file(filepath)
                self.files_analyzed += 1
            except Exception as e:
                print(f"Error analyzing {filepath}: {e}")

    def generate_report(self) -> str:
        """Generate a markdown report of findings."""
        by_severity = {'high': [], 'medium': [], 'low': []}
        by_category = {}

        for issue in self.issues:
            by_severity[issue.severity].append(issue)
            if issue.category not in by_category:
                by_category[issue.category] = []
            by_category[issue.category].append(issue)

        report = [
            "# Entropy Audit Report: src/",
            "",
            f"**Files Analyzed:** {self.files_analyzed}",
            f"**Total Lines:** {self.total_lines:,}",
            f"**Issues Found:** {len(self.issues)}",
            "",
            "## Summary by Severity",
            "",
            f"- **High:** {len(by_severity['high'])}",
            f"- **Medium:** {len(by_severity['medium'])}",
            f"- **Low:** {len(by_severity['low'])}",
            "",
            "## Summary by Category",
            ""
        ]

        for cat, issues in sorted(by_category.items()):
            report.append(f"- **{cat}:** {len(issues)}")

        report.extend(["", "## High Severity Issues", ""])
        for issue in sorted(by_severity['high'], key=lambda x: (x.file, x.line)):
            report.append(f"### {issue.file}")
            report.append(f"- **Line {issue.line}:** {issue.description}")
            if issue.detail:
                report.append(f"  - Detail: `{issue.detail[:80]}`")
            report.append("")

        report.extend(["", "## Medium Severity Issues", ""])
        for issue in sorted(by_severity['medium'], key=lambda x: (x.file, x.line))[:50]:
            report.append(f"- **Line {issue.line}** ({issue.file}): {issue.description}")

        if len(by_severity['medium']) > 50:
            report.append(f"\n_... and {len(by_severity['medium']) - 50} more medium issues_")

        return "\n".join(report)


def main():
    auditor = EntropyAuditor("src")
    auditor.run_audit()
    report = auditor.generate_report()
    print(report)

    # Also write to file
    report_path = "conductor/tracks/data_oriented_optimization_20260312/entropy_audit_report.md"
    with open(report_path, 'w') as f:
        f.write(report)
    print(f"\nReport written to {report_path}")


if __name__ == "__main__":
    main()