#!/usr/local/bin/python
# -*- Mode: Python -*-
# Author: Sam Rushing <rushing@nightmare.com>

"""re-indent a python source file.
  adjusts *only* whitespace in front
  adjusts comments if they have whitespace in front
  whitespace-only lines become a single linefeed
  indents multi-line statements (like the definition of 'adjust' below)"""

from tokenize import *

# If the first token on a line is matched against <adjust>, then the
# indentation level is adjusted by the given value, but only on that
# line.

adjust = {
    # these are not really 'policy' adjustments, but are here because
    # of a fault in the algorithm: the indentation level is set once
    # the operator is seen, but the correct level is that from just
    # *before* it was seen
    OP: {
        '(': -1,
        '[': -1,
        '{': -1,
        ')': +1,
        ']': +1,
        '}': +1,
        }
    }

def redent (next, outfile, indent='\t'):

    def adjust_indent (ttype, token, level):
        if ttype is NL:
            return 0
        elif adjust.has_key (ttype):
            x = adjust[ttype]
            if x.has_key (token):
                return level + x[token]
        return level

    def get_next_token (level, next=next):
        ttype, token, start, end, line = next()
        if ttype is INDENT:
            level = level + 1
        elif ttype is DEDENT:
            level = level - 1
        elif ttype is OP:
            if token in ('(','{','['):
                level = level + 1
            elif token in (')','}',']'):
                level = level - 1
        return level, ttype, token, start, end, line

    level = 0
    pending = []

    # fetch the first token on the line
    level, ttype, token, start, end, line = get_next_token (level)

    while 1:
        if ttype in (INDENT, DEDENT):
            level, ttype, token, start, end, line = get_next_token (level)
        elif ttype is NL:
            if pending:
                # [fix from Hugh Gibson:]
                # collect blank lines for later indentation
                pending.append ('\n')
            else:
                outfile.write ('\n')
            level, ttype, token, start, end, line = get_next_token (level)
        elif ttype is ENDMARKER:
            return
        else:
            if ttype is COMMENT:
                # if a comment is at the front of a line, leave it there,
                # otherwise we have to wait to write out an adjusted comment
                # because the indentation level won't be known until we see
                # the next non-comment line
                if line[0] != '#':
                    pending.append (line[start[1]:])
                else:
                    outfile.write (line)
            else:
                adjusted_indent = adjust_indent (ttype, token, level)
                pending.append (line[start[1]:])
                for line in pending:
                    if line == '\n':
                        # don't emit a whitespace-only line
                        outfile.write ('\n')
                    else:
                        outfile.write (
                            '%s%s' % (
                                (adjusted_indent * indent),
                                line
                                )
                            )
                pending[:] = []
            line_num = end[0]
            while 1:
                level, ttype, token, start, end, line = get_next_token (level)
                if start[0] != line_num:
                    break
                else:
                    # subtle
                    line_num = end[0]

def preprocess (tokens):
    # scan the token list to collapse logical lines, this
    # makes the redent function much less complicated.
    tokens = map (list, tokens)
    tokens.reverse()
    i = 0
    while i < len(tokens):
        [ttype, token, start, end, line] = tokens[i]
        sy, sx = start
        ey, ex = end
        if sy != ey:
            # token spans multiple lines, collapse into a single logical line
            logical_line = line
            while i < len(tokens)-1:
                i = i + 1
                [ttype, token, start, end, line] = tokens[i]
                if end[0] != sy:
                    i = i + 1
                    break
                else:
                    tokens[i][-1] = logical_line
        i = i + 1
    return tokens

def go (filename, outfile, indent):
    """reindent <filename> into <outfile> using <indent> as the indentation character."""
    fi = open (filename, 'r')
    l = []
    tokenize (fi.readline, lambda a,b,c,d,e,l=l: l.append((a,b,c,d,e)))
    fi.close()
    l = preprocess (l)
    redent (l.pop, outfile, indent)

def test (filename):
    """make a before/after bytecode comparison to make sure that
	reindentation has no affect on the code."""
    import marshal
    import sys
    import StringIO

    o_src = open(filename).read()

    try:
        o_pyc = marshal.dumps (compile (o_src, filename, 'exec'))
    except:
        sys.stderr.write ('[exception compiling original, skipping...]')
        return 1

    i_file = StringIO.StringIO()
    go (filename, i_file, '  ')
    i_file.seek (0)
    i_src = i_file.read()
    i_pyc = marshal.dumps (compile (i_src, filename, 'exec'))
    return o_pyc == i_pyc


if __name__ == '__main__':

    USAGE = """Usage:
   %(arg0)s <filename> <indent-string>
       re-indent a single file to stdout
       e.g.: %(arg0)s my_module.py '\t'
   %(arg0)s -t <file1> [<file2> ...]
       test reindentation against files
       e.g.: %(arg0)s /usr/local/lib/python1.5/*.py"""

    import sys
    if (len(sys.argv)> 1) and (sys.argv[1] == '-t'):
        for name in sys.argv[2:]:
            sys.stderr.write ('%s ' % name)
            if test (name):
                sys.stderr.write ('[passed]\n')
            else:
                sys.stderr.write ('[failed]\n')
    elif len(sys.argv) == 3:
        indent = eval('"%s"' % sys.argv[2])
        go (sys.argv[1], sys.stdout, indent)
    else:
        print USAGE % {'arg0':sys.argv[0]}
