import tokenize import io def format_code(source: str) -> str: """ Formats Python code to use exactly 1 space for indentation (including continuations), max 1 blank line between top-level definitions, and 0 blank lines inside function/method bodies. Args: source: The Python source code to format. Returns: The formatted source code. """ if not source: return "" tokens = list(tokenize.generate_tokens(io.StringIO(source).readline)) lines = source.splitlines(keepends=True) num_lines = len(lines) block_level = 0 paren_level = 0 in_function_stack = [] expecting_function_indent = False line_indent = {} line_is_blank = {i: True for i in range(1, num_lines + 2)} line_is_string_interior = {i: False for i in range(1, num_lines + 2)} line_seen = set() pending_blank_lines = [] for tok in tokens: t_type = tok.type t_string = tok.string start_line, _ = tok.start end_line, _ = tok.end if t_type == tokenize.STRING: for l in range(start_line + 1, end_line + 1): line_is_string_interior[l] = True if t_type not in (tokenize.NL, tokenize.NEWLINE, tokenize.INDENT, tokenize.DEDENT, tokenize.ENDMARKER): for l in range(start_line, end_line + 1): line_is_blank[l] = False pending_blank_lines = [] # Real content seen, clear pending blanks # State updates that affect CURRENT line if t_type == tokenize.INDENT: block_level += 1 if expecting_function_indent: in_function_stack.append(block_level) expecting_function_indent = False elif t_type == tokenize.DEDENT: block_level -= 1 if in_function_stack and block_level < in_function_stack[-1]: in_function_stack.pop() # Retroactively update pending blank lines to the current (outer) level for l in pending_blank_lines: line_indent[l] = block_level + paren_level if t_string in (')', ']', '}'): paren_level -= 1 if start_line not in line_seen: line_indent[start_line] = block_level + paren_level if t_type not in (tokenize.INDENT, tokenize.DEDENT): line_seen.add(start_line) if t_type in (tokenize.NL, tokenize.NEWLINE): pending_blank_lines.append(start_line) # State updates that affect FUTURE lines/tokens if t_type == tokenize.NAME and t_string == 'def': expecting_function_indent = True if t_string in ('(', '[', '{'): paren_level += 1 output = [] consecutive_blanks = 0 for i in range(1, num_lines + 1): if line_is_string_interior[i]: output.append(lines[i-1]) continue if line_is_blank[i]: indent = line_indent.get(i, 0) if indent > 0: continue else: if consecutive_blanks < 1: output.append("\n") consecutive_blanks += 1 continue consecutive_blanks = 0 original_line = lines[i-1] indent = line_indent.get(i, 0) stripped = original_line.lstrip() output.append(" " * indent + stripped) if not stripped.endswith('\n') and i < num_lines: output[-1] += '\n' if output and not output[-1].endswith('\n'): output[-1] += '\n' return "".join(output) if __name__ == "__main__": import sys import os if len(sys.argv) > 1: file_path = sys.argv[1] with open(file_path, "r", encoding="utf-8") as f: content = f.read() formatted = format_code(content) if len(sys.argv) > 2 and sys.argv[2] == "--write": with open(file_path, "w", encoding="utf-8") as f: f.write(formatted) else: sys.stdout.reconfigure(encoding='utf-8') sys.stdout.write(formatted)