#!/local/bin/X11/python

import sys

###################################
# code for the parser
###################################

class Arc:
    def __init__(self, i, j, rule, completed):
        self.i = i
        self.j = j
        self.rule = rule
        self.completed = completed
    
    def __repr__(self):
        #return str(self.rule) + ' with ' + str(self.completed) + ' completed from ' + str(self.i) + ' to ' + str(self.j)
        return self.rule.lhs + ' ' + str(self.completed)

    def isCompleted(self):
        return (len(self.completed) == len(self.rule.rhs))
    
    def nextIncomplete(self):
        if self.isCompleted():
            return ''
        else:
            return self.rule.rhs[len(self.completed)]

class Rule:
    def __init__(self, lhs, rhs):
        self.lhs = lhs
        self.rhs = rhs
    
    def __repr__(self):
        return str(self.lhs) + ' -> ' + str(self.rhs)

def parse(s, grammar, lexicon):
    # break input up into tokens
    tokens = s.split(' ')
    n = len(tokens)

    # build initial agenda (containing input)
    agenda = []
    for i in range(n):
        found = 0
        for j in range(len(lexicon)):
            if lexicon[j].rhs[0] == tokens[i]:
                agenda.append(Arc(i, i+1, lexicon[j], [lexicon[j].rhs]))
                found = 1
        if not found:
            print 'error: could not find \'' + tokens[i] + '\' in lexicon'
            sys.exit()
    
    # reverse agenda so pop() function will pop items in order of input
    agenda.reverse()
    
    # perform the chart buildings steps (bottom-up)
    # note: the chart is just implemented as a list (stupid--could be improved!)
    chart = []
    while len(agenda):
        c = agenda.pop()
        #print 'popped',c,'from the agenda'
        
        # prediction
        for rule in grammar:
            if c.rule.lhs == rule.rhs[0]:
                # create arc and add it to the chart
                newarc = Arc(c.i, c.j, rule, [c])
                chart.append(newarc)
                
                if newarc.isCompleted():
                    agenda.append(newarc)
        
        # extension
        for arc in chart:
            if c.rule.lhs == arc.nextIncomplete() and arc.j == c.i:
                # create arc and add it to the chart
                newarc = Arc(arc.i, c.j, arc.rule, arc.completed + [c])
                chart.append(newarc)
                
                if newarc.isCompleted():
                    agenda.append(newarc)
    
    # return [completed constituents] and [complete parse sentences]
    result = [[], []]
    for arc in chart:
        if arc.isCompleted():
            constituent = str(arc).replace('[[\'', '').replace('\']]', '')
            result[0].append(constituent)
            if arc.rule.lhs == 'S' and arc.i == 0 and arc.j == n:
                result[1].append(constituent)
    return result

###################################
# run the parser
###################################

if __name__ == '__main__':
    # to save typing a lot of single quotes
    S = 'S'
    NP = 'NP'
    VP = 'VP'
    PP = 'PP'
    N = 'N'
    D = 'Det'
    A = 'Adj'
    V = 'V'
    P = 'P'

    # build grammr and lexicon
    grammar = [
        Rule(S, [NP, VP]),
        Rule(NP, [N]),
        Rule(NP, [D, N]),
        Rule(NP, [D, A, N]),
        Rule(NP, [NP, PP]),
        Rule(PP, [P, NP]),
        Rule(VP, [V]),
        Rule(VP, [V, NP])
    ]
    lexicon = [
        Rule(D, ['the']),
        Rule(A, ['best']),
        Rule(N, ['best']),
        Rule(N, ['dogs']),
        Rule(V, ['dogs']),
        Rule(N, ['like']),
        Rule(P, ['like']),
        Rule(V, ['like']),
        Rule(N, ['kids']),
        Rule(V, ['kids']),
        Rule(N, ['love']),
        Rule(V, ['love']),
        Rule(N, ['fish']),
        Rule(V, ['fish'])
    ]

    print 'lexicon:'
    print lexicon,'\n'
    print 'grammar:'
    print grammar,'\n'

    result = parse('the best dogs like kids love fish', grammar, lexicon) 

    print 'complete constituents:'
    for constituent in result[0]:
        print constituent

    print '\ncomplete sentence parses:'
    for constituent in result[1]:
        print constituent
