#!/local/bin/X11/python

import sys

###################################
# code for the parser
###################################

class Arc:
    def __init__(self, i, j, rule, completed):
        self.i = i
        self.j = j
        self.rule = rule
        self.completed = completed
    
    def __repr__(self):
        #return str(self.rule) + ' with ' + str(self.completed) + ' completed from ' + str(self.i) + ' to ' + str(self.j)
        return self.rule.lhs + ' ' + str(self.completed)
    
    def __eq__(self, other):
        return self.i == other.i and self.j == other.j and self.rule == other.rule and self.completed == other.completed
    
#    def __hash__(self):
#        result = 71*self.i+93*self.j+hash(self.rule)
#    # this hash intentionally omits the completed field
#    return result
    
    def isCompleted(self):
        return (len(self.completed) == len(self.rule.rhs))
    
    def nextIncomplete(self):
        if self.isCompleted():
            return ''
        else:
            return self.rule.rhs[len(self.completed)]

class Rule:
    def __init__(self, lhs, rhs):
        self.lhs = lhs
        self.rhs = rhs
    
    def __repr__(self):
        return str(self.lhs) + ' -> ' + str(self.rhs)
    
# predict new edge(s) that pick up where arc's completion leaves off
def prediction(chart, grammar, arc):
    for rule in grammar:
        if rule.lhs == arc.nextIncomplete():
            newarc = Arc(arc.j, arc.j, rule, [])
            
            # check if this arc has already been predicted
            # and only if it hasn't should we add it and recurse
            if newarc not in chart:
                chart.append(newarc)
                assert not newarc.isCompleted()
                prediction(chart, grammar, newarc)

def parse(s, grammar, lexicon):
    # break input up into tokens
    tokens = s.split(' ')
    n = len(tokens)

    # build initial agenda (containing input)
    agenda = []
    for i in range(n):
        found = 0
        for j in range(len(lexicon)):
            if lexicon[j].rhs[0] == tokens[i]:
                agenda.append(Arc(i, i+1, lexicon[j], [lexicon[j].rhs]))
                found = 1
        if not found:
            print 'error: could not find \'' + tokens[i] + '\' in lexicon'
            sys.exit()
    
    # reverse agenda so pop() function will pop items in order of input
    agenda.reverse()
    
    # perform the chart buildings steps (top-down)
    # note: the chart is just implemented as a list (stupid--could be improved!)
    chart = []
    
    # populate initial arcs by predicting sentence rules
    for rule in grammar:
        if rule.lhs == 'S':
            newarc = Arc(0, 0, rule, [])
            chart.append(newarc)
            prediction(chart, grammar, newarc)
    
    # repeatedly perform extension, prediction, completion
    while len(agenda):
        c = agenda.pop()
        #print 'popped',c,'from the agenda'
        
        # extension
        for arc in chart:
            if c.rule.lhs == arc.nextIncomplete() and arc.j == c.i:
                # create arc and add it to the chart
                newarc = Arc(arc.i, c.j, arc.rule, arc.completed + [c])
                chart.append(newarc)
                
                if newarc.isCompleted():
                    agenda.append(newarc)
                else:
                    prediction(chart, grammar, newarc);
    
    # return [completed constituents] and [complete parse sentences]
    result = [[], []]
    for arc in chart:
        if arc.isCompleted():
            constituent = str(arc).replace('[[\'', '').replace('\']]', '')
            result[0].append(constituent)
            if arc.rule.lhs == 'S' and arc.i == 0 and arc.j == n:
                result[1].append(constituent)
    return result

###################################
# run the parser
###################################

if __name__ == '__main__':
    # to save typing a lot of single quotes
    S = 'S'
    NP = 'NP'
    VP = 'VP'
    PP = 'PP'
    N = 'N'
    D = 'Det'
    A = 'Adj'
    V = 'V'
    P = 'P'

    # build grammr and lexicon
    grammar = [
        Rule(S, [NP, VP]),
        Rule(NP, [N]),
        Rule(NP, [D, N]),
        Rule(NP, [D, A, N]),
        Rule(NP, [NP, PP]),
        Rule(PP, [P, NP]),
        Rule(VP, [V]),
        Rule(VP, [V, NP])
    ]
    lexicon = [
        Rule(D, ['the']),
        Rule(A, ['best']),
        Rule(N, ['best']),
        Rule(N, ['dogs']),
        Rule(V, ['dogs']),
        Rule(N, ['like']),
        Rule(P, ['like']),
        Rule(V, ['like']),
        Rule(N, ['kids']),
        Rule(V, ['kids']),
        Rule(N, ['love']),
        Rule(V, ['love']),
        Rule(N, ['fish']),
        Rule(V, ['fish'])
    ]

    print 'lexicon:'
    print lexicon,'\n'
    print 'grammar:'
    print grammar,'\n'

    result = parse('the best dogs like kids love fish', grammar, lexicon) 

    print 'complete constituents:'
    for constituent in result[0]:
        print constituent

    print '\ncomplete sentence parses:'
    for constituent in result[1]:
        print constituent
