# -*- Mode: Python -*- # # removed from automata.py - minimization doesn't do much for a lexer, # since you have to keep the final states separate in order to distinguish # the different kinds of tokens. # # Note: the trick used here seems to work, though. # Instead of starting your partition with final and non-final states, # divide the final set in a way that retains the distinction between # the sub-expressions (wrapped in TOKEN by lexer.py) def minimize (self, collapse_finals=True): # XXX using minimization for a lexer is probably not worth it. n_states = len (self.states) W ('self.finals=%r\n' % (self.finals,)) final = self.finals t = {} for fs, ch, ts in self.dfa: t2 = t.get (fs, {}) t2[ch] = ts t[fs] = t2 W ('minimize - input dfa:\n') pp (t) W ('minimize - finals: %r\n' % (final,)) # make sure final states exist for f in final: if not t.has_key (f): t[f] = {} # partition non_final = set (t.keys()).difference (final) if not non_final or not final: W ('warning: dfa is all-final or all-non-final\n') if collapse_finals: # standard: partition into final and non-final part = [final, non_final] else: # leave each final state distinguished # (useful for lexing) part = [[x] for x in final] + [non_final] last_size = len(part) W ('part=%r\n' % (part,)) # churn while 1: # make state->partition map m = {} for i in range(len(part)): for s in part[i]: m[s] = i W ('----------------------------------------\n') W ('part=%r\n' % (part,)) new_part = [] for sub in part: if len(sub) == 1: # can't refine a one-element partition new_part.append (sub) else: refined = {} # is a map from: # ((sym0, ts0), (sym1, ts1), ...) => [, , ...] # where each is a to-state in the current # partition. # each element of defines a new sub-partition # of the current partition. for fs in sub: key = {} for sym, ts in t[fs].items(): ts2 = m[ts] key[(sym, ts2)] = None key = key.keys() key.sort() key = tuple(key) val = refined.get (key, []) val.append (fs) refined[key] = val #W ('sub %r refined to\n' % (sub,)) #pp (refined) # now we can extra the unique sub-sets of # out of r2 = set() for sub in refined.values(): r2.add (tuple(sub)) # extend the new partition scheme with this newly # newly refined set. new_part.extend ([list(x) for x in r2]) #W ('new_part=%r\n' % (part,)) if len(new_part) == len(part): break else: part = new_part # now holds our final partitioning of DFA states. # collapse those that are equivalent #W ('final partition: %r\n' % (part,)) # we sort the new partition so that state zero (initial) is in front... for p in part: p.sort() part.sort() # make old_state->new_state map m = {} for i in range(len(part)): for s in part[i]: m[s] = i # translate dfa old_dfa = t new_dfa = [] for i in range(len(part)): # pick the first of the equiv set if len(part[i]): equiv = part[i][0] old_trans = old_dfa[equiv] new_trans = [] for (sym, ts) in old_trans.items(): new_trans.append ((sym, m[ts])) new_dfa.append (new_trans) # translate final states new_finals = set() for f in final: new_finals.add (m[f]) # ugh, what a bitch that was! return new_dfa, new_finals