#!/usr/local/bin/python # -*- Mode: Python -*- # Author: Sam Rushing """re-indent a python source file. adjusts *only* whitespace in front adjusts comments if they have whitespace in front whitespace-only lines become a single linefeed indents multi-line statements (like the definition of 'adjust' below)""" from tokenize import * # If the first token on a line is matched against , then the # indentation level is adjusted by the given value, but only on that # line. adjust = { # these are not really 'policy' adjustments, but are here because # of a fault in the algorithm: the indentation level is set once # the operator is seen, but the correct level is that from just # *before* it was seen OP: { '(': -1, '[': -1, '{': -1, ')': +1, ']': +1, '}': +1, } } def redent (next, outfile, indent='\t'): def adjust_indent (ttype, token, level): if ttype is NL: return 0 elif adjust.has_key (ttype): x = adjust[ttype] if x.has_key (token): return level + x[token] return level def get_next_token (level, next=next): ttype, token, start, end, line = next() if ttype is INDENT: level = level + 1 elif ttype is DEDENT: level = level - 1 elif ttype is OP: if token in ('(','{','['): level = level + 1 elif token in (')','}',']'): level = level - 1 return level, ttype, token, start, end, line level = 0 pending = [] # fetch the first token on the line level, ttype, token, start, end, line = get_next_token (level) while 1: if ttype in (INDENT, DEDENT): level, ttype, token, start, end, line = get_next_token (level) elif ttype is NL: if pending: # [fix from Hugh Gibson:] # collect blank lines for later indentation pending.append ('\n') else: outfile.write ('\n') level, ttype, token, start, end, line = get_next_token (level) elif ttype is ENDMARKER: return else: if ttype is COMMENT: # if a comment is at the front of a line, leave it there, # otherwise we have to wait to write out an adjusted comment # because the indentation level won't be known until we see # the next non-comment line if line[0] != '#': pending.append (line[start[1]:]) else: outfile.write (line) else: adjusted_indent = adjust_indent (ttype, token, level) pending.append (line[start[1]:]) for line in pending: if line == '\n': # don't emit a whitespace-only line outfile.write ('\n') else: outfile.write ( '%s%s' % ( (adjusted_indent * indent), line ) ) pending[:] = [] line_num = end[0] while 1: level, ttype, token, start, end, line = get_next_token (level) if start[0] != line_num: break else: # subtle line_num = end[0] def preprocess (tokens): # scan the token list to collapse logical lines, this # makes the redent function much less complicated. tokens = map (list, tokens) tokens.reverse() i = 0 while i < len(tokens): [ttype, token, start, end, line] = tokens[i] sy, sx = start ey, ex = end if sy != ey: # token spans multiple lines, collapse into a single logical line logical_line = line while i < len(tokens)-1: i = i + 1 [ttype, token, start, end, line] = tokens[i] if end[0] != sy: i = i + 1 break else: tokens[i][-1] = logical_line i = i + 1 return tokens def go (filename, outfile, indent): """reindent into using as the indentation character.""" fi = open (filename, 'r') l = [] tokenize (fi.readline, lambda a,b,c,d,e,l=l: l.append((a,b,c,d,e))) fi.close() l = preprocess (l) redent (l.pop, outfile, indent) def test (filename): """make a before/after bytecode comparison to make sure that reindentation has no affect on the code.""" import marshal import sys import StringIO o_src = open(filename).read() try: o_pyc = marshal.dumps (compile (o_src, filename, 'exec')) except: sys.stderr.write ('[exception compiling original, skipping...]') return 1 i_file = StringIO.StringIO() go (filename, i_file, ' ') i_file.seek (0) i_src = i_file.read() i_pyc = marshal.dumps (compile (i_src, filename, 'exec')) return o_pyc == i_pyc if __name__ == '__main__': USAGE = """Usage: %(arg0)s re-indent a single file to stdout e.g.: %(arg0)s my_module.py '\t' %(arg0)s -t [ ...] test reindentation against files e.g.: %(arg0)s /usr/local/lib/python1.5/*.py""" import sys if (len(sys.argv)> 1) and (sys.argv[1] == '-t'): for name in sys.argv[2:]: sys.stderr.write ('%s ' % name) if test (name): sys.stderr.write ('[passed]\n') else: sys.stderr.write ('[failed]\n') elif len(sys.argv) == 3: indent = eval('"%s"' % sys.argv[2]) go (sys.argv[1], sys.stdout, indent) else: print USAGE % {'arg0':sys.argv[0]}