manual_slop/scripts/ai_style_formatter.py

import tokenize
import io

def format_code(source: str) -> str:
    """
    Formats Python code to use exactly 1 space for indentation (including continuations),
    max 1 blank line between top-level definitions, and 0 blank lines inside
    function/method bodies.

    Args:
        source: The Python source code to format.

    Returns:
        The formatted source code.
    """
    if not source:
        return ""

    tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
    lines = source.splitlines(keepends=True)
    num_lines = len(lines)

    block_level = 0
    paren_level = 0
    in_function_stack = []
    expecting_function_indent = False

    line_indent = {}
    line_is_blank = {i: True for i in range(1, num_lines + 2)}
    line_is_string_interior = {i: False for i in range(1, num_lines + 2)}

    line_seen = set()
    pending_blank_lines = []

    for tok in tokens:
        t_type = tok.type
        t_string = tok.string
        start_line, _ = tok.start
        end_line, _ = tok.end

        if t_type == tokenize.STRING:
            for l in range(start_line + 1, end_line + 1):
                line_is_string_interior[l] = True

        if t_type not in (tokenize.NL, tokenize.NEWLINE, tokenize.INDENT, tokenize.DEDENT, tokenize.ENDMARKER):
            for l in range(start_line, end_line + 1):
                line_is_blank[l] = False
            pending_blank_lines = [] # Real content seen, clear pending blanks

        # State updates that affect CURRENT line
        if t_type == tokenize.INDENT:
            block_level += 1
            if expecting_function_indent:
                in_function_stack.append(block_level)
                expecting_function_indent = False
        elif t_type == tokenize.DEDENT:
            block_level -= 1
            if in_function_stack and block_level < in_function_stack[-1]:
                in_function_stack.pop()
            # Retroactively update pending blank lines to the current (outer) level
            for l in pending_blank_lines:
                line_indent[l] = block_level + paren_level

        if t_string in (')', ']', '}'):
            paren_level -= 1

        if start_line not in line_seen:
            line_indent[start_line] = block_level + paren_level
            if t_type not in (tokenize.INDENT, tokenize.DEDENT):
                line_seen.add(start_line)
                if t_type in (tokenize.NL, tokenize.NEWLINE):
                    pending_blank_lines.append(start_line)

        # State updates that affect FUTURE lines/tokens
        if t_type == tokenize.NAME and t_string == 'def':
            expecting_function_indent = True
        if t_string in ('(', '[', '{'):
            paren_level += 1

    output = []
    consecutive_blanks = 0

    for i in range(1, num_lines + 1):
        if line_is_string_interior[i]:
            output.append(lines[i-1])
            continue

        if line_is_blank[i]:
            indent = line_indent.get(i, 0)
            if indent > 0:
                continue
            else:
                if consecutive_blanks < 1:
                    output.append("\n")
                    consecutive_blanks += 1
                continue

        consecutive_blanks = 0
        original_line = lines[i-1]
        indent = line_indent.get(i, 0)
        stripped = original_line.lstrip()

        output.append(" " * indent + stripped)
        if not stripped.endswith('\n') and i < num_lines:
            output[-1] += '\n'

    if output and not output[-1].endswith('\n'):
        output[-1] += '\n'

    return "".join(output)

if __name__ == "__main__":
    import sys
    import os
    if len(sys.argv) > 1:
        file_path = sys.argv[1]
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
        formatted = format_code(content)
        if len(sys.argv) > 2 and sys.argv[2] == "--write":
            with open(file_path, "w", encoding="utf-8") as f:
                f.write(formatted)
        else:
            sys.stdout.reconfigure(encoding='utf-8')
            sys.stdout.write(formatted)