1
2
3
4
5
6
7
8 """
9 The lexicon is constructed by calling
10 lexicon.parseLexicon(<lexicon string>).
11
12 In order to construct a parser, you also need a rule set.
13 The standard English rules are provided in chart as
14 chart.DefaultRuleSet
15
16 The parser can then be constructed by calling, for example:
17 parser = chart.CCGChartParser(<lexicon>, <ruleset>)
18
19 Parsing is then performed by running
20 parser.nbest_parse(<sentence>.split())
21
22 While this returns a list of trees, the default representation
23 of the produced trees is not very enlightening, particularly
24 given that it uses the same tree class as the CFG parsers.
25 It is probably better to call:
26 chart.printCCGDerivation(<parse tree extracted from list>)
27 which should print a nice representation of the derivation.
28
29 This entire process is shown far more clearly in the demonstration:
30 python chart.py
31 """
32
33 from nltk.parse.api import *
34 from nltk.parse.chart import AbstractChartRule, EdgeI, Chart
35 from nltk import Tree, defaultdict
36
37 import lexicon
38 from combinator import *
39
40
41
42
48
49
50 - def lhs(self): return self._categ
51 - def span(self): return self._span
52 - def start(self): return self._span[0]
53 - def end(self): return self._span[1]
54 - def length(self): return self._span[1] - self.span[0]
55 - def rhs(self): return ()
56 - def dot(self): return 0
59 - def next(self): return None
60
65
67 if not isinstance(other, CCGEdge): return -1
68 return cmp((self._span,self._categ,self._rule),
69 (other.span(),other.categ(),other.rule()))
70
72 return hash((self._span,self._categ,self._rule))
73
75 '''
76 Class representing leaf edges in a CCG derivation.
77 '''
79 self._pos = pos
80 self._categ = categ
81 self._leaf = leaf
82
83
84 - def lhs(self): return self._categ
85 - def span(self): return (self._pos,self._pos+1)
86 - def start(self): return self._pos
87 - def end(self): return self._pos + 1
89 - def rhs(self): return self._leaf
90 - def dot(self): return 0
93 - def next(self): return None
94
97
98 - def leaf(self): return self._leaf
99
101 if not isinstance(other, CCGLeafEdge): return -1
102 return cmp((self._span,self._categ,self._rule),
103 other.span(),other.categ(),other.rule())
104
106 return hash((self._pos,self._categ,self._leaf))
107
109 '''
110 Class implementing application of a binary combinator to a chart.
111 Takes the directed combinator to apply.
112 '''
113 NUMEDGES = 2
116
117
118 - def apply_iter(self, chart, grammar, left_edge, right_edge):
130
131
133 return str(self._combinator)
134
135
136
138 '''
139 Class for applying forward type raising
140 '''
141 NUMEDGES = 2
142
145 - def apply_iter(self, chart, grammar, left_edge, right_edge):
154 return str(self._combinator)
155
157 '''
158 Class for applying backward type raising.
159 '''
160 NUMEDGES = 2
161
164 - def apply_iter(self, chart, grammar, left_edge, right_edge):
173 return str(self._combinator)
174
175
176
177 ApplicationRuleSet = [BinaryCombinatorRule(ForwardApplication), \
178 BinaryCombinatorRule(BackwardApplication)]
179 CompositionRuleSet = [BinaryCombinatorRule(ForwardComposition), \
180 BinaryCombinatorRule(BackwardComposition), \
181 BinaryCombinatorRule(BackwardBx)]
182 SubstitutionRuleSet = [BinaryCombinatorRule(ForwardSubstitution), \
183 BinaryCombinatorRule(BackwardSx)]
184 TypeRaiseRuleSet = [ForwardTypeRaiseRule(), BackwardTypeRaiseRule()]
185
186
187 DefaultRuleSet = ApplicationRuleSet + CompositionRuleSet + \
188 SubstitutionRuleSet + TypeRaiseRuleSet
189
191 '''
192 Chart parser for CCGs.
193 Based largely on the ChartParser class from NLTK.
194 '''
195 - def __init__(self, lexicon, rules, trace=0):
199
202
203
205 tokens = list(tokens)
206 chart = CCGChart(list(tokens))
207 lex = self._lexicon
208
209
210 for index in range(chart.num_leaves()):
211 for cat in lex.categories(chart.leaf(index)):
212 new_edge = CCGLeafEdge(index, cat, chart.leaf(index))
213 chart.insert(new_edge, ())
214
215
216
217 for span in range(2,chart.num_leaves()+1):
218 for start in range(0,chart.num_leaves()-span+1):
219
220
221 for part in range(1,span):
222 lstart = start
223 mid = start + part
224 rend = start + span
225
226 for left in chart.select(span=(lstart,mid)):
227 for right in chart.select(span=(mid,rend)):
228
229 for rule in self._rules:
230 edges_added_by_rule = 0
231 for newedge in rule.apply_iter(chart,lex,left,right):
232 edges_added_by_rule += 1
233
234
235 return chart.parses(lex.start())[:n]
236
240
241
242
243
244 - def _trees(self, edge, complete, memo, tree_class):
268
269
270
271
273
274 leafcats = tree.pos()
275 leafstr = ''
276 catstr = ''
277
278
279
280 for (leaf, cat) in leafcats:
281 nextlen = 2 + max(len(leaf),len(str(cat)))
282 lcatlen = (nextlen - len(str(cat)))/2
283 rcatlen = lcatlen + (nextlen - len(str(cat)))%2
284 catstr += ' '*lcatlen + str(cat) + ' '*rcatlen
285 lleaflen = (nextlen - len(leaf))/2
286 rleaflen = lleaflen + (nextlen - len(leaf))%2
287 leafstr += ' '*lleaflen + leaf + ' '*rleaflen
288 print leafstr
289 print catstr
290
291
292 printCCGTree(0,tree)
293
294
296 rwidth = lwidth
297
298
299
300 if not isinstance(tree,Tree):
301 return 2 + lwidth + len(tree)
302
303
304 for child in tree:
305 rwidth = max(rwidth,printCCGTree(rwidth,child))
306
307
308
309 if not isinstance(tree.node,tuple):
310 return max(rwidth,2 + lwidth + len(str(tree.node)),
311 2 + lwidth + len(tree[0]))
312
313 (res,op) = tree.node
314
315
316 print lwidth*' ' + (rwidth-lwidth)*'-' + str(op)
317
318 respadlen = (rwidth - lwidth - len(str(res)))/2 + lwidth
319 print respadlen*' ' + str(res)
320 return rwidth
321
322
323
324
325
326 lex = lexicon.parseLexicon('''
327 :- S, NP, N, VP # Primitive categories, S is the target primitive
328
329 Det :: NP/N # Family of words
330 Pro :: NP
331 TV :: VP/NP
332 Modal :: (S\\NP)/VP # Backslashes need to be escaped
333
334 I => Pro # Word -> Category mapping
335 you => Pro
336
337 the => Det
338
339 # Variables have the special keyword 'var'
340 # '.' prevents permutation
341 # ',' prevents composition
342 and => var\\.,var/.,var
343
344 which => (N\\N)/(S/NP)
345
346 will => Modal # Categories can be either explicit, or families.
347 might => Modal
348
349 cook => TV
350 eat => TV
351
352 mushrooms => N
353 parsnips => N
354 bacon => N
355 ''')
356
361
362 if __name__ == '__main__':
363 demo()
364