import tokenize import io import os import sys def format_code(source: str) -> str: """ Formats Python code to use exactly 1 space for indentation (including continuations), max 1 blank line between top-level definitions, and 0 blank lines inside function/method bodies. """ if not source: return "" try: tokens = list(tokenize.generate_tokens(io.StringIO(source).readline)) except tokenize.TokenError: return source # Return as-is if it's not valid python (e.g. template files) lines = source.splitlines(keepends=True) num_lines = len(lines) block_level = 0 paren_level = 0 in_function_stack = [] expecting_function_indent = False line_indent = {} line_is_blank = {i: True for i in range(1, num_lines + 2)} line_is_string_interior = {i: False for i in range(1, num_lines + 2)} line_seen = set() pending_blank_lines = [] for tok in tokens: t_type = tok.type t_string = tok.string start_line, _ = tok.start end_line, _ = tok.end if t_type == tokenize.STRING: for l in range(start_line + 1, end_line + 1): line_is_string_interior[l] = True if t_type not in (tokenize.NL, tokenize.NEWLINE, tokenize.INDENT, tokenize.DEDENT, tokenize.ENDMARKER): for l in range(start_line, end_line + 1): line_is_blank[l] = False pending_blank_lines = [] if t_type == tokenize.INDENT: block_level += 1 if expecting_function_indent: in_function_stack.append(block_level) expecting_function_indent = False elif t_type == tokenize.DEDENT: block_level -= 1 if in_function_stack and block_level < in_function_stack[-1]: in_function_stack.pop() for l in pending_blank_lines: line_indent[l] = block_level + paren_level if t_string in (')', ']', '}'): paren_level -= 1 if start_line not in line_seen: line_indent[start_line] = block_level + paren_level if t_type not in (tokenize.INDENT, tokenize.DEDENT): line_seen.add(start_line) if t_type in (tokenize.NL, tokenize.NEWLINE): pending_blank_lines.append(start_line) if t_type == tokenize.NAME and t_string == 'def': expecting_function_indent = True if t_string in ('(', '[', '{'): paren_level += 1 output = [] consecutive_blanks = 0 for i in range(1, num_lines + 1): if line_is_string_interior[i]: output.append(lines[i-1]) continue if line_is_blank[i]: indent = line_indent.get(i, 0) if indent > 0: continue else: if consecutive_blanks < 1: output.append("\n") consecutive_blanks += 1 continue original_line = lines[i-1] indent = line_indent.get(i, 0) stripped = original_line.lstrip() is_def_start = stripped.startswith(('def ', 'class ', 'async def ', '@')) if is_def_start and output and consecutive_blanks == 0: prev_line = output[-1].strip() if prev_line and not prev_line.endswith(':') and not prev_line.startswith('@'): output.append("\n") consecutive_blanks += 1 consecutive_blanks = 0 output.append(" " * indent + stripped) if not stripped.endswith('\n') and i < num_lines: output[-1] += '\n' if output and not output[-1].endswith('\n'): output[-1] += '\n' return "".join(output) def process_file(file_path: str, write: bool) -> None: try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() formatted = format_code(content) if write: if formatted != content: with open(file_path, "w", encoding="utf-8") as f: f.write(formatted) print(f"Formatted: {file_path}") else: sys.stdout.reconfigure(encoding='utf-8') sys.stdout.write(formatted) except Exception as e: print(f"Error processing {file_path}: {e}") def main() -> None: import argparse parser = argparse.ArgumentParser(description="AI-optimized Python code formatter.") parser.add_argument("paths", nargs="+", help="Files or directories to format.") parser.add_argument("--write", action="store_true", help="Write changes back to files.") parser.add_argument("--exclude", nargs="*", default=[".venv", "__pycache__", ".git"], help="Directories to exclude.") args = parser.parse_args() for path in args.paths: if os.path.isfile(path): process_file(path, args.write) elif os.path.isdir(path): for root, dirs, files in os.walk(path): dirs[:] = [d for d in dirs if d not in args.exclude] for file in files: if file.endswith(".py"): process_file(os.path.join(root, file), args.write) if __name__ == "__main__": main()