1
2
3
4
5
6
7
8
9
10 """
11 A graphical tool for exploring the recursive descent parser.
12
13 The recursive descent parser maintains a tree, which records the
14 structure of the portion of the text that has been parsed. It uses
15 CFG productions to expand the fringe of the tree, and matches its
16 leaves against the text. Initially, the tree contains the start
17 symbol ("S"). It is shown in the main canvas, to the right of the
18 list of available expansions.
19
20 The parser builds up a tree structure for the text using three
21 operations:
22
23 - "expand" uses a CFG production to add children to a node on the
24 fringe of the tree.
25 - "match" compares a leaf in the tree to a text token.
26 - "backtrack" returns the tree to its state before the most recent
27 expand or match operation.
28
29 The parser maintains a list of tree locations called a "frontier" to
30 remember which nodes have not yet been expanded and which leaves have
31 not yet been matched against the text. The leftmost frontier node is
32 shown in green, and the other frontier nodes are shown in blue. The
33 parser always performs expand and match operations on the leftmost
34 element of the frontier.
35
36 You can control the parser's operation by using the "expand," "match,"
37 and "backtrack" buttons; or you can use the "step" button to let the
38 parser automatically decide which operation to apply. The parser uses
39 the following rules to decide which operation to apply:
40
41 - If the leftmost frontier element is a token, try matching it.
42 - If the leftmost frontier element is a node, try expanding it with
43 the first untried expansion.
44 - Otherwise, backtrack.
45
46 The "expand" button applies the untried expansion whose CFG production
47 is listed earliest in the grammar. To manually choose which expansion
48 to apply, click on a CFG production from the list of available
49 expansions, on the left side of the main window.
50
51 The "autostep" button will let the parser continue applying
52 applications to the tree until it reaches a complete parse. You can
53 cancel an autostep in progress at any time by clicking on the
54 "autostep" button again.
55
56 Keyboard Shortcuts::
57 [Space]\t Perform the next expand, match, or backtrack operation
58 [a]\t Step through operations until the next complete parse
59 [e]\t Perform an expand operation
60 [m]\t Perform a match operation
61 [b]\t Perform a backtrack operation
62 [Delete]\t Reset the parser
63 [g]\t Show/hide available expansions list
64 [h]\t Help
65 [Ctrl-p]\t Print
66 [q]\t Quit
67 """
68
69 import string
70
71 import nltk
72 from nltk.tree import Tree
73 from nltk.util import in_idle
74 from nltk.draw.util import *
75 from nltk.draw.tree import *
76 from nltk.draw.cfg import *
77
79 """
80 A graphical tool for exploring the recursive descent parser. The tool
81 displays the parser's tree and the remaining text, and allows the
82 user to control the parser's operation. In particular, the user
83 can expand subtrees on the frontier, match tokens on the frontier
84 against the text, and backtrack. A "step" button simply steps
85 through the parsing process, performing the operations that
86 C{RecursiveDescentParser} would use.
87 """
88 - def __init__(self, grammar, sent, trace=0):
125
126
127
128
129
131
132 self._sysfont = tkFont.Font(font=Button()["font"])
133 root.option_add("*Font", self._sysfont)
134
135
136 self._size = IntVar(root)
137 self._size.set(self._sysfont.cget('size'))
138
139 self._boldfont = tkFont.Font(family='helvetica', weight='bold',
140 size=self._size.get())
141 self._font = tkFont.Font(family='helvetica',
142 size=self._size.get())
143 if self._size.get() < 0: big = self._size.get()-2
144 else: big = self._size.get()+2
145 self._bigfont = tkFont.Font(family='helvetica', weight='bold',
146 size=big)
147
149
150 self._prodframe = listframe = Frame(parent)
151 self._prodframe.pack(fill='both', side='left', padx=2)
152 self._prodlist_label = Label(self._prodframe, font=self._boldfont,
153 text='Available Expansions')
154 self._prodlist_label.pack()
155 self._prodlist = Listbox(self._prodframe, selectmode='single',
156 relief='groove', background='white',
157 foreground='#909090', font=self._font,
158 selectforeground='#004040',
159 selectbackground='#c0f0c0')
160
161 self._prodlist.pack(side='right', fill='both', expand=1)
162
163 self._productions = list(self._parser.grammar().productions())
164 for production in self._productions:
165 self._prodlist.insert('end', (' %s' % production))
166 self._prodlist.config(height=min(len(self._productions), 25))
167
168
169 if len(self._productions) > 25:
170 listscroll = Scrollbar(self._prodframe,
171 orient='vertical')
172 self._prodlist.config(yscrollcommand = listscroll.set)
173 listscroll.config(command=self._prodlist.yview)
174 listscroll.pack(side='left', fill='y')
175
176
177 self._prodlist.bind('<<ListboxSelect>>', self._prodlist_select)
178
180
181 self._top.bind('<Control-q>', self.destroy)
182 self._top.bind('<Control-x>', self.destroy)
183 self._top.bind('<Escape>', self.destroy)
184 self._top.bind('e', self.expand)
185
186
187 self._top.bind('m', self.match)
188 self._top.bind('<Alt-m>', self.match)
189 self._top.bind('<Control-m>', self.match)
190 self._top.bind('b', self.backtrack)
191 self._top.bind('<Alt-b>', self.backtrack)
192 self._top.bind('<Control-b>', self.backtrack)
193 self._top.bind('<Control-z>', self.backtrack)
194 self._top.bind('<BackSpace>', self.backtrack)
195 self._top.bind('a', self.autostep)
196
197 self._top.bind('<Control-space>', self.autostep)
198 self._top.bind('<Control-c>', self.cancel_autostep)
199 self._top.bind('<space>', self.step)
200 self._top.bind('<Delete>', self.reset)
201 self._top.bind('<Control-p>', self.postscript)
202
203
204 self._top.bind('<Control-h>', self.help)
205 self._top.bind('<F1>', self.help)
206
207
208
209 self._top.bind('<Control-g>', self.edit_grammar)
210 self._top.bind('<Control-t>', self.edit_sentence)
211
231
232
233
234
235
242
244 self._feedbackframe = feedbackframe = Frame(parent)
245 feedbackframe.pack(fill='x', side='bottom', padx=3, pady=3)
246 self._lastoper_label = Label(feedbackframe, text='Last Operation:',
247 font=self._font)
248 self._lastoper_label.pack(side='left')
249 lastoperframe = Frame(feedbackframe, relief='sunken', border=1)
250 lastoperframe.pack(fill='x', side='right', expand=1, padx=5)
251 self._lastoper1 = Label(lastoperframe, foreground='#007070',
252 background='#f0f0f0', font=self._font)
253 self._lastoper2 = Label(lastoperframe, anchor='w', width=30,
254 foreground='#004040', background='#f0f0f0',
255 font=self._font)
256 self._lastoper1.pack(side='left')
257 self._lastoper2.pack(side='left', fill='x', expand=1)
258
260 self._cframe = CanvasFrame(parent, background='white',
261
262 closeenough=10,
263 border=2, relief='sunken')
264 self._cframe.pack(expand=1, fill='both', side='top', pady=2)
265 canvas = self._canvas = self._cframe.canvas()
266
267
268 self._tree = None
269 self._textwidgets = []
270 self._textline = None
271
273 menubar = Menu(parent)
274
275 filemenu = Menu(menubar, tearoff=0)
276 filemenu.add_command(label='Reset Parser', underline=0,
277 command=self.reset, accelerator='Del')
278 filemenu.add_command(label='Print to Postscript', underline=0,
279 command=self.postscript, accelerator='Ctrl-p')
280 filemenu.add_command(label='Exit', underline=1,
281 command=self.destroy, accelerator='Ctrl-x')
282 menubar.add_cascade(label='File', underline=0, menu=filemenu)
283
284 editmenu = Menu(menubar, tearoff=0)
285 editmenu.add_command(label='Edit Grammar', underline=5,
286 command=self.edit_grammar,
287 accelerator='Ctrl-g')
288 editmenu.add_command(label='Edit Text', underline=5,
289 command=self.edit_sentence,
290 accelerator='Ctrl-t')
291 menubar.add_cascade(label='Edit', underline=0, menu=editmenu)
292
293 rulemenu = Menu(menubar, tearoff=0)
294 rulemenu.add_command(label='Step', underline=1,
295 command=self.step, accelerator='Space')
296 rulemenu.add_separator()
297 rulemenu.add_command(label='Match', underline=0,
298 command=self.match, accelerator='Ctrl-m')
299 rulemenu.add_command(label='Expand', underline=0,
300 command=self.expand, accelerator='Ctrl-e')
301 rulemenu.add_separator()
302 rulemenu.add_command(label='Backtrack', underline=0,
303 command=self.backtrack, accelerator='Ctrl-b')
304 menubar.add_cascade(label='Apply', underline=0, menu=rulemenu)
305
306 viewmenu = Menu(menubar, tearoff=0)
307 viewmenu.add_checkbutton(label="Show Grammar", underline=0,
308 variable=self._show_grammar,
309 command=self._toggle_grammar)
310 viewmenu.add_separator()
311 viewmenu.add_radiobutton(label='Tiny', variable=self._size,
312 underline=0, value=10, command=self.resize)
313 viewmenu.add_radiobutton(label='Small', variable=self._size,
314 underline=0, value=12, command=self.resize)
315 viewmenu.add_radiobutton(label='Medium', variable=self._size,
316 underline=0, value=14, command=self.resize)
317 viewmenu.add_radiobutton(label='Large', variable=self._size,
318 underline=0, value=18, command=self.resize)
319 viewmenu.add_radiobutton(label='Huge', variable=self._size,
320 underline=0, value=24, command=self.resize)
321 menubar.add_cascade(label='View', underline=0, menu=viewmenu)
322
323 animatemenu = Menu(menubar, tearoff=0)
324 animatemenu.add_radiobutton(label="No Animation", underline=0,
325 variable=self._animation_frames,
326 value=0)
327 animatemenu.add_radiobutton(label="Slow Animation", underline=0,
328 variable=self._animation_frames,
329 value=10, accelerator='-')
330 animatemenu.add_radiobutton(label="Normal Animation", underline=0,
331 variable=self._animation_frames,
332 value=5, accelerator='=')
333 animatemenu.add_radiobutton(label="Fast Animation", underline=0,
334 variable=self._animation_frames,
335 value=2, accelerator='+')
336 menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
337
338
339 helpmenu = Menu(menubar, tearoff=0)
340 helpmenu.add_command(label='About', underline=0,
341 command=self.about)
342 helpmenu.add_command(label='Instructions', underline=0,
343 command=self.help, accelerator='F1')
344 menubar.add_cascade(label='Help', underline=0, menu=helpmenu)
345
346 parent.config(menu=menubar)
347
348
349
350
351
352 - def _get(self, widget, treeloc):
357
358
359
360
361
363 canvas = self._canvas
364
365
366 if self._tree is not None:
367 self._cframe.destroy_widget(self._tree)
368 for twidget in self._textwidgets:
369 self._cframe.destroy_widget(twidget)
370 if self._textline is not None:
371 self._canvas.delete(self._textline)
372
373
374 helv = ('helvetica', -self._size.get())
375 bold = ('helvetica', -self._size.get(), 'bold')
376 attribs = {'tree_color': '#000000', 'tree_width': 2,
377 'node_font': bold, 'leaf_font': helv,}
378 tree = self._parser.tree()
379 self._tree = tree_to_treesegment(canvas, tree, **attribs)
380 self._cframe.add_widget(self._tree, 30, 5)
381
382
383 helv = ('helvetica', -self._size.get())
384 bottom = y = self._cframe.scrollregion()[3]
385 self._textwidgets = [TextWidget(canvas, word, font=self._font)
386 for word in self._sent]
387 for twidget in self._textwidgets:
388 self._cframe.add_widget(twidget, 0, 0)
389 twidget.move(0, bottom-twidget.bbox()[3]-5)
390 y = min(y, twidget.bbox()[1])
391
392
393 self._textline = canvas.create_line(-5000, y-5, 5000, y-5, dash='.')
394
395
396 self._highlight_nodes()
397 self._highlight_prodlist()
398
399
400 self._position_text()
401
402
408
410
411 bold = ('helvetica', -self._size.get(), 'bold')
412 for treeloc in self._parser.frontier()[:1]:
413 self._get(self._tree, treeloc)['color'] = '#20a050'
414 self._get(self._tree, treeloc)['font'] = bold
415 for treeloc in self._parser.frontier()[1:]:
416 self._get(self._tree, treeloc)['color'] = '#008080'
417
436
437 - def _position_text(self):
438
439 numwords = len(self._sent)
440 num_matched = numwords - len(self._parser.remaining_text())
441 leaves = self._tree_leaves()[:num_matched]
442 xmax = self._tree.bbox()[0]
443 for i in range(0, len(leaves)):
444 widget = self._textwidgets[i]
445 leaf = leaves[i]
446 widget['color'] = '#006040'
447 leaf['color'] = '#006040'
448 widget.move(leaf.bbox()[0] - widget.bbox()[0], 0)
449 xmax = widget.bbox()[2] + 10
450
451
452 for i in range(len(leaves), numwords):
453 widget = self._textwidgets[i]
454 widget['color'] = '#a0a0a0'
455 widget.move(xmax - widget.bbox()[0], 0)
456 xmax = widget.bbox()[2] + 10
457
458
459 if self._parser.currently_complete():
460 for twidget in self._textwidgets:
461 twidget['color'] = '#00a000'
462
463
464 for i in range(0, len(leaves)):
465 widget = self._textwidgets[i]
466 leaf = leaves[i]
467 dy = widget.bbox()[1] - leaf.bbox()[3] - 10.0
468 dy = max(dy, leaf.parent().node().bbox()[3] - leaf.bbox()[3] + 10)
469 leaf.move(0, dy)
470
479
480
481
482
483
485 self._autostep = 0
486 if self._top is None: return
487 self._top.destroy()
488 self._top = None
489
491 self._autostep = 0
492 self._parser.initialize(self._sent)
493 self._lastoper1['text'] = 'Reset Application'
494 self._lastoper2['text'] = ''
495 self._redraw()
496
498 if self._animation_frames.get() == 0:
499 self._animation_frames.set(2)
500 if self._autostep:
501 self._autostep = 0
502 else:
503 self._autostep = 1
504 self._step()
505
507
508 self._autostep = 0
509
510
511 - def step(self, *e): self._autostep = 0; self._step()
512 - def match(self, *e): self._autostep = 0; self._match()
515
517 if self._animating_lock: return
518
519
520 if self._expand(): pass
521 elif self._parser.untried_match() and self._match(): pass
522 elif self._backtrack(): pass
523 else:
524 self._lastoper1['text'] = 'Finished'
525 self._lastoper2['text'] = ''
526 self._autostep = 0
527
528
529 if self._parser.currently_complete():
530 self._autostep = 0
531 self._lastoper2['text'] += ' [COMPLETE PARSE]'
532
534 if self._animating_lock: return
535 old_frontier = self._parser.frontier()
536 rv = self._parser.expand()
537 if rv is not None:
538 self._lastoper1['text'] = 'Expand:'
539 self._lastoper2['text'] = rv
540 self._prodlist.selection_clear(0, 'end')
541 index = self._productions.index(rv)
542 self._prodlist.selection_set(index)
543 self._animate_expand(old_frontier[0])
544 return 1
545 else:
546 self._lastoper1['text'] = 'Expand:'
547 self._lastoper2['text'] = '(all expansions tried)'
548 return 0
549
551 if self._animating_lock: return
552 old_frontier = self._parser.frontier()
553 rv = self._parser.match()
554 if rv is not None:
555 self._lastoper1['text'] = 'Match:'
556 self._lastoper2['text'] = rv
557 self._animate_match(old_frontier[0])
558 return 1
559 else:
560 self._lastoper1['text'] = 'Match:'
561 self._lastoper2['text'] = '(failed)'
562 return 0
563
565 if self._animating_lock: return
566 if self._parser.backtrack():
567 elt = self._parser.tree()
568 for i in self._parser.frontier()[0]:
569 elt = elt[i]
570 self._lastoper1['text'] = 'Backtrack'
571 self._lastoper2['text'] = ''
572 if isinstance(elt, Tree):
573 self._animate_backtrack(self._parser.frontier()[0])
574 else:
575 self._animate_match_backtrack(self._parser.frontier()[0])
576 return 1
577 else:
578 self._autostep = 0
579 self._lastoper1['text'] = 'Finished'
580 self._lastoper2['text'] = ''
581 return 0
582
584 ABOUT = ("NLTK Recursive Descent Parser Application\n"+
585 "Written by Edward Loper")
586 TITLE = 'About: Recursive Descent Parser Application'
587 try:
588 from tkMessageBox import Message
589 Message(message=ABOUT, title=TITLE).show()
590 except:
591 ShowText(self._top, TITLE, ABOUT)
592
593 - def help(self, *e):
594 self._autostep = 0
595
596 try:
597 ShowText(self._top, 'Help: Recursive Descent Parser Application',
598 (__doc__).strip(), width=75, font='fixed')
599 except:
600 ShowText(self._top, 'Help: Recursive Descent Parser Application',
601 (__doc__).strip(), width=75)
602
603 - def postscript(self, *e):
604 self._autostep = 0
605 self._cframe.print_to_file()
606
607 - def mainloop(self, *args, **kwargs):
608 """
609 Enter the Tkinter mainloop. This function must be called if
610 this demo is created from a non-interactive program (e.g.
611 from a secript); otherwise, the demo will close as soon as
612 the script completes.
613 """
614 if in_idle(): return
615 self._top.mainloop(*args, **kwargs)
616
625
626
627
628
629
631 if self._show_grammar.get():
632 self._prodframe.pack(fill='both', side='left', padx=2,
633 after=self._feedbackframe)
634 self._lastoper1['text'] = 'Show Grammar'
635 else:
636 self._prodframe.pack_forget()
637 self._lastoper1['text'] = 'Hide Grammar'
638 self._lastoper2['text'] = ''
639
640
641
642
643
644
645
646
647
648
649
650
670
671
672
673
674
676 oldwidget = self._get(self._tree, treeloc)
677 oldtree = oldwidget.parent()
678 top = not isinstance(oldtree.parent(), TreeSegmentWidget)
679
680 tree = self._parser.tree()
681 for i in treeloc:
682 tree = tree[i]
683
684 widget = tree_to_treesegment(self._canvas, tree,
685 node_font=self._boldfont,
686 leaf_color='white',
687 tree_width=2, tree_color='white',
688 node_color='white',
689 leaf_font=self._font)
690 widget.node()['color'] = '#20a050'
691
692 (oldx, oldy) = oldtree.node().bbox()[:2]
693 (newx, newy) = widget.node().bbox()[:2]
694 widget.move(oldx-newx, oldy-newy)
695
696 if top:
697 self._cframe.add_widget(widget, 0, 5)
698 widget.move(30-widget.node().bbox()[0], 0)
699 self._tree = widget
700 else:
701 oldtree.parent().replace_child(oldtree, widget)
702
703
704
705 if widget.subtrees():
706 dx = (oldx + widget.node().width()/2 -
707 widget.subtrees()[0].bbox()[0]/2 -
708 widget.subtrees()[0].bbox()[2]/2)
709 for subtree in widget.subtrees(): subtree.move(dx, 0)
710
711 self._makeroom(widget)
712
713 if top:
714 self._cframe.destroy_widget(oldtree)
715 else:
716 oldtree.destroy()
717
718 colors = ['gray%d' % (10*int(10*x/self._animation_frames.get()))
719 for x in range(self._animation_frames.get(),0,-1)]
720
721
722 dy = widget.bbox()[3] + 30 - self._canvas.coords(self._textline)[1]
723 if dy > 0:
724 for twidget in self._textwidgets: twidget.move(0, dy)
725 self._canvas.move(self._textline, 0, dy)
726
727 self._animate_expand_frame(widget, colors)
728
752
754 if len(colors) > 0:
755 self._animating_lock = 1
756 widget['color'] = colors[0]
757 for subtree in widget.subtrees():
758 if isinstance(subtree, TreeSegmentWidget):
759 subtree.node()['color'] = colors[0]
760 else:
761 subtree['color'] = colors[0]
762 self._top.after(50, self._animate_expand_frame,
763 widget, colors[1:])
764 else:
765 widget['color'] = 'black'
766 for subtree in widget.subtrees():
767 if isinstance(subtree, TreeSegmentWidget):
768 subtree.node()['color'] = 'black'
769 else:
770 subtree['color'] = 'black'
771 self._redraw_quick()
772 widget.node()['color'] = 'black'
773 self._animating_lock = 0
774 if self._autostep: self._step()
775
791
805
813
815 widget = self._get(self._tree, treeloc)
816
817 dy = ((self._textwidgets[0].bbox()[1] - widget.bbox()[3] - 10.0) /
818 max(1, self._animation_frames.get()))
819 self._animate_match_frame(self._animation_frames.get(), widget, dy)
820
822 if frame > 0:
823 self._animating_lock = 1
824 widget.move(0, dy)
825 self._top.after(10, self._animate_match_frame,
826 frame-1, widget, dy)
827 else:
828 widget['color'] = '#006040'
829 self._redraw_quick()
830 self._animating_lock = 0
831 if self._autostep: self._step()
832
844
847
854
860
862 self._sent = sentence.split()
863 self.reset()
864
866 """
867 Create a recursive descent parser demo, using a simple grammar and
868 text.
869 """
870 from nltk.grammar import parse_cfg
871 grammar = parse_cfg("""
872 # Grammatical productions.
873 S -> NP VP
874 NP -> Det N PP | Det N
875 VP -> V NP PP | V NP | V
876 PP -> P NP
877 # Lexical productions.
878 NP -> 'I'
879 Det -> 'the' | 'a'
880 N -> 'man' | 'park' | 'dog' | 'telescope'
881 V -> 'ate' | 'saw'
882 P -> 'in' | 'under' | 'with'
883 """)
884
885 sent = 'the dog saw a man in the park'.split()
886
887 RecursiveDescentApp(grammar, sent).mainloop()
888
889 if __name__ == '__main__':
890 app()
891
892 __all__ = ['app']
893