updates to tools and mma skills
This commit is contained in:
@@ -1,125 +1,130 @@
|
||||
import tokenize
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
|
||||
def format_code(source: str) -> str:
|
||||
"""
|
||||
"""
|
||||
Formats Python code to use exactly 1 space for indentation (including continuations),
|
||||
max 1 blank line between top-level definitions, and 0 blank lines inside
|
||||
function/method bodies.
|
||||
|
||||
Args:
|
||||
source: The Python source code to format.
|
||||
|
||||
Returns:
|
||||
The formatted source code.
|
||||
"""
|
||||
if not source:
|
||||
return ""
|
||||
|
||||
tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
|
||||
lines = source.splitlines(keepends=True)
|
||||
num_lines = len(lines)
|
||||
|
||||
block_level = 0
|
||||
paren_level = 0
|
||||
in_function_stack = []
|
||||
if not source:
|
||||
return ""
|
||||
try:
|
||||
tokens = list(tokenize.generate_tokens(io.StringIO(source).readline))
|
||||
except tokenize.TokenError:
|
||||
return source # Return as-is if it's not valid python (e.g. template files)
|
||||
lines = source.splitlines(keepends=True)
|
||||
num_lines = len(lines)
|
||||
block_level = 0
|
||||
paren_level = 0
|
||||
in_function_stack = []
|
||||
expecting_function_indent = False
|
||||
line_indent = {}
|
||||
line_is_blank = {i: True for i in range(1, num_lines + 2)}
|
||||
line_is_string_interior = {i: False for i in range(1, num_lines + 2)}
|
||||
line_seen = set()
|
||||
pending_blank_lines = []
|
||||
for tok in tokens:
|
||||
t_type = tok.type
|
||||
t_string = tok.string
|
||||
start_line, _ = tok.start
|
||||
end_line, _ = tok.end
|
||||
if t_type == tokenize.STRING:
|
||||
for l in range(start_line + 1, end_line + 1):
|
||||
line_is_string_interior[l] = True
|
||||
if t_type not in (tokenize.NL, tokenize.NEWLINE, tokenize.INDENT, tokenize.DEDENT, tokenize.ENDMARKER):
|
||||
for l in range(start_line, end_line + 1):
|
||||
line_is_blank[l] = False
|
||||
pending_blank_lines = []
|
||||
if t_type == tokenize.INDENT:
|
||||
block_level += 1
|
||||
if expecting_function_indent:
|
||||
in_function_stack.append(block_level)
|
||||
expecting_function_indent = False
|
||||
|
||||
line_indent = {}
|
||||
line_is_blank = {i: True for i in range(1, num_lines + 2)}
|
||||
line_is_string_interior = {i: False for i in range(1, num_lines + 2)}
|
||||
|
||||
line_seen = set()
|
||||
pending_blank_lines = []
|
||||
|
||||
for tok in tokens:
|
||||
t_type = tok.type
|
||||
t_string = tok.string
|
||||
start_line, _ = tok.start
|
||||
end_line, _ = tok.end
|
||||
|
||||
if t_type == tokenize.STRING:
|
||||
for l in range(start_line + 1, end_line + 1):
|
||||
line_is_string_interior[l] = True
|
||||
|
||||
if t_type not in (tokenize.NL, tokenize.NEWLINE, tokenize.INDENT, tokenize.DEDENT, tokenize.ENDMARKER):
|
||||
for l in range(start_line, end_line + 1):
|
||||
line_is_blank[l] = False
|
||||
pending_blank_lines = [] # Real content seen, clear pending blanks
|
||||
elif t_type == tokenize.DEDENT:
|
||||
block_level -= 1
|
||||
if in_function_stack and block_level < in_function_stack[-1]:
|
||||
in_function_stack.pop()
|
||||
for l in pending_blank_lines:
|
||||
line_indent[l] = block_level + paren_level
|
||||
if t_string in (')', ']', '}'):
|
||||
paren_level -= 1
|
||||
if start_line not in line_seen:
|
||||
line_indent[start_line] = block_level + paren_level
|
||||
if t_type not in (tokenize.INDENT, tokenize.DEDENT):
|
||||
line_seen.add(start_line)
|
||||
if t_type in (tokenize.NL, tokenize.NEWLINE):
|
||||
pending_blank_lines.append(start_line)
|
||||
if t_type == tokenize.NAME and t_string == 'def':
|
||||
expecting_function_indent = True
|
||||
if t_string in ('(', '[', '{'):
|
||||
paren_level += 1
|
||||
output = []
|
||||
consecutive_blanks = 0
|
||||
for i in range(1, num_lines + 1):
|
||||
if line_is_string_interior[i]:
|
||||
output.append(lines[i-1])
|
||||
continue
|
||||
if line_is_blank[i]:
|
||||
indent = line_indent.get(i, 0)
|
||||
if indent > 0:
|
||||
continue
|
||||
else:
|
||||
if consecutive_blanks < 1:
|
||||
output.append("\n")
|
||||
consecutive_blanks += 1
|
||||
continue
|
||||
original_line = lines[i-1]
|
||||
indent = line_indent.get(i, 0)
|
||||
stripped = original_line.lstrip()
|
||||
is_def_start = stripped.startswith(('def ', 'class ', 'async def ', '@'))
|
||||
if is_def_start and output and consecutive_blanks == 0:
|
||||
prev_line = output[-1].strip()
|
||||
if prev_line and not prev_line.endswith(':') and not prev_line.startswith('@'):
|
||||
output.append("\n")
|
||||
consecutive_blanks += 1
|
||||
consecutive_blanks = 0
|
||||
output.append(" " * indent + stripped)
|
||||
if not stripped.endswith('\n') and i < num_lines:
|
||||
output[-1] += '\n'
|
||||
if output and not output[-1].endswith('\n'):
|
||||
output[-1] += '\n'
|
||||
return "".join(output)
|
||||
|
||||
# State updates that affect CURRENT line
|
||||
if t_type == tokenize.INDENT:
|
||||
block_level += 1
|
||||
if expecting_function_indent:
|
||||
in_function_stack.append(block_level)
|
||||
expecting_function_indent = False
|
||||
elif t_type == tokenize.DEDENT:
|
||||
block_level -= 1
|
||||
if in_function_stack and block_level < in_function_stack[-1]:
|
||||
in_function_stack.pop()
|
||||
# Retroactively update pending blank lines to the current (outer) level
|
||||
for l in pending_blank_lines:
|
||||
line_indent[l] = block_level + paren_level
|
||||
|
||||
if t_string in (')', ']', '}'):
|
||||
paren_level -= 1
|
||||
def process_file(file_path: str, write: bool) -> None:
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
formatted = format_code(content)
|
||||
if write:
|
||||
if formatted != content:
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(formatted)
|
||||
print(f"Formatted: {file_path}")
|
||||
else:
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
sys.stdout.write(formatted)
|
||||
except Exception as e:
|
||||
print(f"Error processing {file_path}: {e}")
|
||||
|
||||
if start_line not in line_seen:
|
||||
line_indent[start_line] = block_level + paren_level
|
||||
if t_type not in (tokenize.INDENT, tokenize.DEDENT):
|
||||
line_seen.add(start_line)
|
||||
if t_type in (tokenize.NL, tokenize.NEWLINE):
|
||||
pending_blank_lines.append(start_line)
|
||||
|
||||
# State updates that affect FUTURE lines/tokens
|
||||
if t_type == tokenize.NAME and t_string == 'def':
|
||||
expecting_function_indent = True
|
||||
if t_string in ('(', '[', '{'):
|
||||
paren_level += 1
|
||||
|
||||
output = []
|
||||
consecutive_blanks = 0
|
||||
|
||||
for i in range(1, num_lines + 1):
|
||||
if line_is_string_interior[i]:
|
||||
output.append(lines[i-1])
|
||||
continue
|
||||
|
||||
if line_is_blank[i]:
|
||||
indent = line_indent.get(i, 0)
|
||||
if indent > 0:
|
||||
continue
|
||||
else:
|
||||
if consecutive_blanks < 1:
|
||||
output.append("\n")
|
||||
consecutive_blanks += 1
|
||||
continue
|
||||
|
||||
consecutive_blanks = 0
|
||||
original_line = lines[i-1]
|
||||
indent = line_indent.get(i, 0)
|
||||
stripped = original_line.lstrip()
|
||||
|
||||
output.append(" " * indent + stripped)
|
||||
if not stripped.endswith('\n') and i < num_lines:
|
||||
output[-1] += '\n'
|
||||
|
||||
if output and not output[-1].endswith('\n'):
|
||||
output[-1] += '\n'
|
||||
|
||||
return "".join(output)
|
||||
def main() -> None:
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="AI-optimized Python code formatter.")
|
||||
parser.add_argument("paths", nargs="+", help="Files or directories to format.")
|
||||
parser.add_argument("--write", action="store_true", help="Write changes back to files.")
|
||||
parser.add_argument("--exclude", nargs="*", default=[".venv", "__pycache__", ".git"], help="Directories to exclude.")
|
||||
args = parser.parse_args()
|
||||
for path in args.paths:
|
||||
if os.path.isfile(path):
|
||||
process_file(path, args.write)
|
||||
elif os.path.isdir(path):
|
||||
for root, dirs, files in os.walk(path):
|
||||
dirs[:] = [d for d in dirs if d not in args.exclude]
|
||||
for file in files:
|
||||
if file.endswith(".py"):
|
||||
process_file(os.path.join(root, file), args.write)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
import os
|
||||
if len(sys.argv) > 1:
|
||||
file_path = sys.argv[1]
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
formatted = format_code(content)
|
||||
if len(sys.argv) > 2 and sys.argv[2] == "--write":
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(formatted)
|
||||
else:
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
sys.stdout.write(formatted)
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user