Parsing

1   Unit tests for the Context Free Grammar class

 
>>> from nltk import Nonterminal, nonterminals, Production, parse_cfg, ContextFreeGrammar
 
>>> nt1 = Nonterminal('NP')
>>> nt2 = Nonterminal('VP')
 
>>> nt1.symbol()
'NP'
 
>>> nt1 == Nonterminal('NP')
True
 
>>> nt1 == nt2
False
 
>>> S, NP, VP, PP = nonterminals('S, NP, VP, PP')
>>> N, V, P, DT = nonterminals('N, V, P, DT')
 
>>> prod1 = Production(S, [NP, VP])
>>> prod2 = Production(NP, [DT, NP])
 
>>> prod1.lhs()
S
 
>>> prod1.rhs()
(NP, VP)
 
>>> prod1 == Production(S, [NP, VP])
True
 
>>> prod1 == prod2
False
 
>>> grammar = parse_cfg("""
... S -> NP VP
... PP -> P NP
... NP -> 'the' N | N PP | 'the' N PP
... VP -> V NP | V PP | V NP PP
... N -> 'cat'
... N -> 'dog'
... N -> 'rug'
... V -> 'chased'
... V -> 'sat'
... P -> 'in'
... P -> 'on'
... """)

2   Unit tests for the rd (Recursive Descent Parser) class

Create and run a recursive descent parser over both a syntactically ambiguous and unambiguous sentence.

 
>>> from nltk.parse import RecursiveDescentParser
>>> rd = RecursiveDescentParser(grammar)
 
>>> sentence1 = 'the cat chased the dog'.split()
>>> sentence2 = 'the cat chased the dog on the rug'.split()
 
>>> for t in rd.nbest_parse(sentence1):
...     print t
(S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
 
>>> for t in rd.nbest_parse(sentence2):
...     print t
(S
  (NP the (N cat))
  (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
(S
  (NP the (N cat))
  (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
(dolist (expr doctest-font-lock-keywords)

(add-to-list 'font-lock-keywords expr))

font-lock-keywords

(add-to-list 'font-lock-keywords
(car doctest-font-lock-keywords))

3   Unit tests for the sr (Shift Reduce Parser) class

Create and run a shift reduce parser over both a syntactically ambiguous and unambiguous sentence. Note that unlike the recursive descent parser, one and only one parse is ever returned.

 
>>> from nltk.parse import ShiftReduceParser
>>> sr = ShiftReduceParser(grammar)
 
>>> sentence1 = 'the cat chased the dog'.split()
>>> sentence2 = 'the cat chased the dog on the rug'.split()
 
>>> for t in sr.nbest_parse(sentence1):
...     print t
(S (NP the (N cat)) (VP (V chased) (NP the (N dog))))

The shift reduce parser uses heuristics to decide what to do when there are multiple possible shift or reduce operations available - for the supplied grammar clearly the wrong operation is selected.

 
>>> for t in sr.nbest_parse(sentence2):
...     print t

4   Unit tests for the Chart Parser class

We use the demo() function for testing. We must turn off showing of times.

 
>>> import nltk

Top-down

 
>>> nltk.parse.chart.demo(1, should_print_times=False, trace=1,
...                       sent='I saw John with a dog', numparses=2)
* Sentence:
I saw John with a dog
['I', 'saw', 'John', 'with', 'a', 'dog']

* Strategy: Top-down

|.  I   . saw  . John . with .  a   . dog  .|
|[------]      .      .      .      .      .| [0:1] 'I'
|.      [------]      .      .      .      .| [1:2] 'saw'
|.      .      [------]      .      .      .| [2:3] 'John'
|.      .      .      [------]      .      .| [3:4] 'with'
|.      .      .      .      [------]      .| [4:5] 'a'
|.      .      .      .      .      [------]| [5:6] 'dog'
|>      .      .      .      .      .      .| [0:0] S  -> * NP VP
|>      .      .      .      .      .      .| [0:0] NP -> * NP PP
|>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
|>      .      .      .      .      .      .| [0:0] NP -> * 'I'
|[------]      .      .      .      .      .| [0:1] NP -> 'I' *
|[------>      .      .      .      .      .| [0:1] S  -> NP * VP
|[------>      .      .      .      .      .| [0:1] NP -> NP * PP
|.      >      .      .      .      .      .| [1:1] VP -> * VP PP
|.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
|.      >      .      .      .      .      .| [1:1] VP -> * Verb
|.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
|.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
|.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
|.      [------]      .      .      .      .| [1:2] VP -> Verb *
|[-------------]      .      .      .      .| [0:2] S  -> NP VP *
|.      [------>      .      .      .      .| [1:2] VP -> VP * PP
|.      .      >      .      .      .      .| [2:2] NP -> * NP PP
|.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
|.      .      >      .      .      .      .| [2:2] NP -> * 'John'
|.      .      [------]      .      .      .| [2:3] NP -> 'John' *
|.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
|.      .      [------>      .      .      .| [2:3] NP -> NP * PP
|.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
|.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
|.      .      .      .      >      .      .| [4:4] NP -> * NP PP
|.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
|.      .      .      .      >      .      .| [4:4] Det -> * 'a'
|.      .      .      .      [------]      .| [4:5] Det -> 'a' *
|.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
|.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
|.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
|.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
|.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
|.      .      .      .      [------------->| [4:6] NP -> NP * PP
|.      .      [---------------------------]| [2:6] NP -> NP PP *
|.      [----------------------------------]| [1:6] VP -> Verb NP *
|.      .      [--------------------------->| [2:6] NP -> NP * PP
|[=========================================]| [0:6] S  -> NP VP *
|.      [---------------------------------->| [1:6] VP -> VP * PP
|[--------------------]      .      .      .| [0:3] S  -> NP VP *
|.      [------------->      .      .      .| [1:3] VP -> VP * PP
|.      [----------------------------------]| [1:6] VP -> VP PP *
|[=========================================]| [0:6] S  -> NP VP *
|.      [---------------------------------->| [1:6] VP -> VP * PP
(S
  (NP I)
  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
(S
  (NP I)
  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))

Bottom-up

 
>>> nltk.parse.chart.demo(2, should_print_times=False, trace=1,
...                       sent='I saw John with a dog', numparses=2)
* Sentence:
I saw John with a dog
['I', 'saw', 'John', 'with', 'a', 'dog']

* Strategy: Bottom-up

|.  I   . saw  . John . with .  a   . dog  .|
|[------]      .      .      .      .      .| [0:1] 'I'
|.      [------]      .      .      .      .| [1:2] 'saw'
|.      .      [------]      .      .      .| [2:3] 'John'
|.      .      .      [------]      .      .| [3:4] 'with'
|.      .      .      .      [------]      .| [4:5] 'a'
|.      .      .      .      .      [------]| [5:6] 'dog'
|>      .      .      .      .      .      .| [0:0] NP -> * 'I'
|[------]      .      .      .      .      .| [0:1] NP -> 'I' *
|>      .      .      .      .      .      .| [0:0] S  -> * NP VP
|>      .      .      .      .      .      .| [0:0] NP -> * NP PP
|[------>      .      .      .      .      .| [0:1] S  -> NP * VP
|[------>      .      .      .      .      .| [0:1] NP -> NP * PP
|.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
|.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
|.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
|.      >      .      .      .      .      .| [1:1] VP -> * Verb
|.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
|.      [------]      .      .      .      .| [1:2] VP -> Verb *
|.      >      .      .      .      .      .| [1:1] VP -> * VP PP
|[-------------]      .      .      .      .| [0:2] S  -> NP VP *
|.      [------>      .      .      .      .| [1:2] VP -> VP * PP
|.      .      >      .      .      .      .| [2:2] NP -> * 'John'
|.      .      [------]      .      .      .| [2:3] NP -> 'John' *
|.      .      >      .      .      .      .| [2:2] S  -> * NP VP
|.      .      >      .      .      .      .| [2:2] NP -> * NP PP
|.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
|.      .      [------>      .      .      .| [2:3] S  -> NP * VP
|.      .      [------>      .      .      .| [2:3] NP -> NP * PP
|[--------------------]      .      .      .| [0:3] S  -> NP VP *
|.      [------------->      .      .      .| [1:3] VP -> VP * PP
|.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
|.      .      .      >      .      .      .| [3:3] Prep -> * 'with'
|.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
|.      .      .      [------]      .      .| [3:4] Prep -> 'with' *
|.      .      .      .      >      .      .| [4:4] Det -> * 'a'
|.      .      .      .      [------]      .| [4:5] Det -> 'a' *
|.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
|.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
|.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
|.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
|.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
|.      .      .      .      >      .      .| [4:4] S  -> * NP VP
|.      .      .      .      >      .      .| [4:4] NP -> * NP PP
|.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
|.      .      .      .      [------------->| [4:6] S  -> NP * VP
|.      .      .      .      [------------->| [4:6] NP -> NP * PP
|.      .      [---------------------------]| [2:6] NP -> NP PP *
|.      [----------------------------------]| [1:6] VP -> VP PP *
|[=========================================]| [0:6] S  -> NP VP *
|.      [---------------------------------->| [1:6] VP -> VP * PP
|.      [----------------------------------]| [1:6] VP -> Verb NP *
|.      .      [--------------------------->| [2:6] S  -> NP * VP
|.      .      [--------------------------->| [2:6] NP -> NP * PP
|[=========================================]| [0:6] S  -> NP VP *
|.      [---------------------------------->| [1:6] VP -> VP * PP
(S
  (NP I)
  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
(S
  (NP I)
  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))

Bottom-up Left-Corner

 
>>> nltk.parse.chart.demo(3, should_print_times=False, trace=1,
...                       sent='I saw John with a dog', numparses=2)
* Sentence:
I saw John with a dog
['I', 'saw', 'John', 'with', 'a', 'dog']

* Strategy: Bottom-up left-corner

|.  I   . saw  . John . with .  a   . dog  .|
|[------]      .      .      .      .      .| [0:1] 'I'
|.      [------]      .      .      .      .| [1:2] 'saw'
|.      .      [------]      .      .      .| [2:3] 'John'
|.      .      .      [------]      .      .| [3:4] 'with'
|.      .      .      .      [------]      .| [4:5] 'a'
|.      .      .      .      .      [------]| [5:6] 'dog'
|[------]      .      .      .      .      .| [0:1] NP -> 'I' *
|[------>      .      .      .      .      .| [0:1] S  -> NP * VP
|[------>      .      .      .      .      .| [0:1] NP -> NP * PP
|.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
|.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
|.      [------]      .      .      .      .| [1:2] VP -> Verb *
|.      [------>      .      .      .      .| [1:2] VP -> VP * PP
|[-------------]      .      .      .      .| [0:2] S  -> NP VP *
|.      .      [------]      .      .      .| [2:3] NP -> 'John' *
|.      .      [------>      .      .      .| [2:3] S  -> NP * VP
|.      .      [------>      .      .      .| [2:3] NP -> NP * PP
|.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
|.      [------------->      .      .      .| [1:3] VP -> VP * PP
|[--------------------]      .      .      .| [0:3] S  -> NP VP *
|.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
|.      .      .      [------]      .      .| [3:4] Prep -> 'with' *
|.      .      .      .      [------]      .| [4:5] Det -> 'a' *
|.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
|.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
|.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
|.      .      .      .      [------------->| [4:6] S  -> NP * VP
|.      .      .      .      [------------->| [4:6] NP -> NP * PP
|.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
|.      .      [---------------------------]| [2:6] NP -> NP PP *
|.      [----------------------------------]| [1:6] VP -> VP PP *
|.      [---------------------------------->| [1:6] VP -> VP * PP
|[=========================================]| [0:6] S  -> NP VP *
|.      .      [--------------------------->| [2:6] S  -> NP * VP
|.      .      [--------------------------->| [2:6] NP -> NP * PP
|.      [----------------------------------]| [1:6] VP -> Verb NP *
|.      [---------------------------------->| [1:6] VP -> VP * PP
|[=========================================]| [0:6] S  -> NP VP *
(S
  (NP I)
  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
(S
  (NP I)
  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))

The stepping chart parser

 
>>> nltk.parse.chart.demo(4, should_print_times=False, trace=1,
...                       sent='I saw John with a dog', numparses=2)
* Sentence:
I saw John with a dog
['I', 'saw', 'John', 'with', 'a', 'dog']

* Strategy: Stepping (top-down vs bottom-up)

*** SWITCH TO TOP DOWN
|[------]      .      .      .      .      .| [0:1] 'I'
|.      [------]      .      .      .      .| [1:2] 'saw'
|.      .      [------]      .      .      .| [2:3] 'John'
|.      .      .      [------]      .      .| [3:4] 'with'
|.      .      .      .      [------]      .| [4:5] 'a'
|.      .      .      .      .      [------]| [5:6] 'dog'
|>      .      .      .      .      .      .| [0:0] S  -> * NP VP
|>      .      .      .      .      .      .| [0:0] NP -> * NP PP
|>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
|>      .      .      .      .      .      .| [0:0] NP -> * 'I'
|[------]      .      .      .      .      .| [0:1] NP -> 'I' *
|[------>      .      .      .      .      .| [0:1] S  -> NP * VP
|[------>      .      .      .      .      .| [0:1] NP -> NP * PP
|.      >      .      .      .      .      .| [1:1] VP -> * VP PP
|.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
|.      >      .      .      .      .      .| [1:1] VP -> * Verb
|.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
|.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
|.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
|.      [------]      .      .      .      .| [1:2] VP -> Verb *
|[-------------]      .      .      .      .| [0:2] S  -> NP VP *
|.      [------>      .      .      .      .| [1:2] VP -> VP * PP
*** SWITCH TO BOTTOM UP
|.      .      >      .      .      .      .| [2:2] NP -> * 'John'
|.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
|.      .      .      >      .      .      .| [3:3] Prep -> * 'with'
|.      .      .      .      >      .      .| [4:4] Det -> * 'a'
|.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
|.      .      [------]      .      .      .| [2:3] NP -> 'John' *
|.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
|.      .      .      [------]      .      .| [3:4] Prep -> 'with' *
|.      .      .      .      [------]      .| [4:5] Det -> 'a' *
|.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
|.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
|[--------------------]      .      .      .| [0:3] S  -> NP VP *
|.      [------------->      .      .      .| [1:3] VP -> VP * PP
|.      .      >      .      .      .      .| [2:2] S  -> * NP VP
|.      .      >      .      .      .      .| [2:2] NP -> * NP PP
|.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
|.      .      [------>      .      .      .| [2:3] S  -> NP * VP
|.      .      [------>      .      .      .| [2:3] NP -> NP * PP
|.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
|.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
|.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
|.      [----------------------------------]| [1:6] VP -> VP PP *
*** SWITCH TO TOP DOWN
|.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
|.      .      .      .      >      .      .| [4:4] NP -> * NP PP
|.      .      .      >      .      .      .| [3:3] VP -> * VP PP
|.      .      .      >      .      .      .| [3:3] VP -> * Verb NP
|.      .      .      >      .      .      .| [3:3] VP -> * Verb
|[=========================================]| [0:6] S  -> NP VP *
|.      [---------------------------------->| [1:6] VP -> VP * PP
|.      .      [---------------------------]| [2:6] NP -> NP PP *
|.      .      .      .      [------------->| [4:6] NP -> NP * PP
|.      [----------------------------------]| [1:6] VP -> Verb NP *
|.      .      [--------------------------->| [2:6] S  -> NP * VP
|.      .      [--------------------------->| [2:6] NP -> NP * PP
|[=========================================]| [0:6] S  -> NP VP *
|.      [---------------------------------->| [1:6] VP -> VP * PP
|.      .      .      .      .      .      >| [6:6] VP -> * VP PP
|.      .      .      .      .      .      >| [6:6] VP -> * Verb NP
|.      .      .      .      .      .      >| [6:6] VP -> * Verb
*** SWITCH TO BOTTOM UP
|.      .      .      .      >      .      .| [4:4] S  -> * NP VP
|.      .      .      .      [------------->| [4:6] S  -> NP * VP
*** SWITCH TO TOP DOWN
*** SWITCH TO BOTTOM UP
*** SWITCH TO TOP DOWN
*** SWITCH TO BOTTOM UP
*** SWITCH TO TOP DOWN
*** SWITCH TO BOTTOM UP
(S
  (NP I)
  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
(S
  (NP I)
  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))

5   Unit tests for the Incremental Chart Parser class

The incremental chart parsers are defined in earleychart.py. We use the demo() function for testing. We must turn off showing of times.

 
>>> import nltk

Earley Chart Parser

 
>>> nltk.parse.earleychart.demo(should_print_times=False, trace=1,
...                             sent='I saw John with a dog', numparses=2)
* Sentence:
I saw John with a dog
['I', 'saw', 'John', 'with', 'a', 'dog']

|.  I   . saw  . John . with .  a   . dog  .|
|[------]      .      .      .      .      .| [0:1] 'I'
|.      [------]      .      .      .      .| [1:2] 'saw'
|.      .      [------]      .      .      .| [2:3] 'John'
|.      .      .      [------]      .      .| [3:4] 'with'
|.      .      .      .      [------]      .| [4:5] 'a'
|.      .      .      .      .      [------]| [5:6] 'dog'
|>      .      .      .      .      .      .| [0:0] S  -> * NP VP
|>      .      .      .      .      .      .| [0:0] NP -> * NP PP
|>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
|>      .      .      .      .      .      .| [0:0] NP -> * 'I'
|[------]      .      .      .      .      .| [0:1] NP -> 'I' *
|[------>      .      .      .      .      .| [0:1] S  -> NP * VP
|[------>      .      .      .      .      .| [0:1] NP -> NP * PP
|.      >      .      .      .      .      .| [1:1] VP -> * VP PP
|.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
|.      >      .      .      .      .      .| [1:1] VP -> * Verb
|.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
|.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
|.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
|.      [------]      .      .      .      .| [1:2] VP -> Verb *
|[-------------]      .      .      .      .| [0:2] S  -> NP VP *
|.      [------>      .      .      .      .| [1:2] VP -> VP * PP
|.      .      >      .      .      .      .| [2:2] NP -> * NP PP
|.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
|.      .      >      .      .      .      .| [2:2] NP -> * 'John'
|.      .      [------]      .      .      .| [2:3] NP -> 'John' *
|.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
|.      .      [------>      .      .      .| [2:3] NP -> NP * PP
|.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
|[--------------------]      .      .      .| [0:3] S  -> NP VP *
|.      [------------->      .      .      .| [1:3] VP -> VP * PP
|.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
|.      .      .      .      >      .      .| [4:4] NP -> * NP PP
|.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
|.      .      .      .      >      .      .| [4:4] Det -> * 'a'
|.      .      .      .      [------]      .| [4:5] Det -> 'a' *
|.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
|.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
|.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
|.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
|.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
|.      .      .      .      [------------->| [4:6] NP -> NP * PP
|.      .      [---------------------------]| [2:6] NP -> NP PP *
|.      [----------------------------------]| [1:6] VP -> VP PP *
|[=========================================]| [0:6] S  -> NP VP *
|.      [---------------------------------->| [1:6] VP -> VP * PP
|.      [----------------------------------]| [1:6] VP -> Verb NP *
|.      .      [--------------------------->| [2:6] NP -> NP * PP
|[=========================================]| [0:6] S  -> NP VP *
|.      [---------------------------------->| [1:6] VP -> VP * PP
(S
  (NP I)
  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
(S
  (NP I)
  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))

6   Unit tests for LARGE context-free grammars

Reading the ATIS grammar.

 
>>> grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
>>> grammar
<Grammar with 5517 productions>

Reading the test sentences.

 
>>> sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt', format='raw')
>>> sentences = nltk.parse.util.extract_test_sentences(sentences)
>>> len(sentences)
98
>>> testsentence = sentences[22]
>>> testsentence[0]
['show', 'me', 'northwest', 'flights', 'to', 'detroit', '.']
>>> testsentence[1]
17
>>> sentence = testsentence[0]

Bottom-up parsing.

 
>>> parser = nltk.parse.BottomUpChartParser(grammar)
>>> chart = parser.chart_parse(sentence)
>>> print chart.num_edges()
7661
>>> print len(chart.parses(grammar.start()))
17

Bottom-up Left-corner parsing.

 
>>> parser = nltk.parse.BottomUpLeftCornerChartParser(grammar)
>>> chart = parser.chart_parse(sentence)
>>> print chart.num_edges()
4986
>>> print len(chart.parses(grammar.start()))
17

Top-down parsing.

 
>>> parser = nltk.parse.TopDownChartParser(grammar)
>>> chart = parser.chart_parse(sentence)
>>> print chart.num_edges()
28352
>>> print len(chart.parses(grammar.start()))
17

Earley parsing.

 
>>> parser = nltk.parse.EarleyChartParser(grammar)
>>> chart = parser.chart_parse(sentence)
>>> print chart.num_edges()
28352
>>> print len(chart.parses(grammar.start()))
17

7   Unit tests for the Probabilistic CFG class

 
>>> from nltk.corpus import treebank
>>> from itertools import islice
>>> from nltk import parse_pcfg, induce_pcfg, toy_pcfg1, toy_pcfg2

Create a set of probabilistic CFG productions.

 
>>> grammar = parse_pcfg("""
... A -> B B [.3] | C B C [.7]
... B -> B D [.5] | C [.5]
... C -> 'a' [.1] | 'b' [0.9]
... D -> 'b' [1.0]
... """)
>>> prod = grammar.productions()[0]
>>> prod
A -> B B [0.3]
 
>>> prod.lhs()
A
 
>>> prod.rhs()
(B, B)
 
>>> prod.prob()
0.29999999999999999
 
>>> grammar.start()
A
 
>>> grammar.productions()
[A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]

Induce some productions using parsed Treebank data.

 
>>> productions = []
>>> for fileid in treebank.fileids()[:2]:
...     for t in treebank.parsed_sents(fileid):
...         productions += t.productions()
 
>>> grammar = induce_pcfg(S, productions)
>>> grammar
<Grammar with 71 productions>
 
>>> grammar.productions()[:5]
[PP -> IN NP [1.0], NNP -> 'Nov.' [0.0714285714286], NNP -> 'Agnew' [0.0714285714286], JJ -> 'industrial' [0.142857142857], NP -> CD NNS [0.133333333333]]

8   Unit tests for the Probabilistic Chart Parse classes

 
>>> tokens = "Jack saw Bob with my cookie".split()
>>> grammar = toy_pcfg2
>>> print grammar
Grammar with 23 productions (start state = S)
    S -> NP VP [1.0]
    VP -> V NP [0.59]
    VP -> V [0.4]
    VP -> VP PP [0.01]
    NP -> Det N [0.41]
    NP -> Name [0.28]
    NP -> NP PP [0.31]
    PP -> P NP [1.0]
    V -> 'saw' [0.21]
    V -> 'ate' [0.51]
    V -> 'ran' [0.28]
    N -> 'boy' [0.11]
    N -> 'cookie' [0.12]
    N -> 'table' [0.13]
    N -> 'telescope' [0.14]
    N -> 'hill' [0.5]
    Name -> 'Jack' [0.52]
    Name -> 'Bob' [0.48]
    P -> 'with' [0.61]
    P -> 'under' [0.39]
    Det -> 'the' [0.41]
    Det -> 'a' [0.31]
    Det -> 'my' [0.28]

Create several parsers using different queuing strategies and show the resulting parses.

 
>>> from nltk.parse import pchart
 
>>> parser = pchart.InsideChartParser(grammar)
>>> for t in parser.nbest_parse(tokens):
...     print t
(S
  (NP (Name Jack))
  (VP
    (V saw)
    (NP
      (NP (Name Bob))
      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31606532355e-06)
(S
  (NP (Name Jack))
  (VP
    (VP (V saw) (NP (Name Bob)))
    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744042695e-07)
 
>>> parser = pchart.RandomChartParser(grammar)
>>> for t in parser.nbest_parse(tokens):
...     print t
(S
  (NP (Name Jack))
  (VP
    (V saw)
    (NP
      (NP (Name Bob))
      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31606532355e-06)
(S
  (NP (Name Jack))
  (VP
    (VP (V saw) (NP (Name Bob)))
    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744042695e-07)
 
>>> parser = pchart.UnsortedChartParser(grammar)
>>> for t in parser.nbest_parse(tokens):
...     print t
(S
  (NP (Name Jack))
  (VP
    (V saw)
    (NP
      (NP (Name Bob))
      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31606532355e-06)
(S
  (NP (Name Jack))
  (VP
    (VP (V saw) (NP (Name Bob)))
    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744042695e-07)
 
>>> parser = pchart.LongestChartParser(grammar)
>>> for t in parser.nbest_parse(tokens):
...     print t
(S
  (NP (Name Jack))
  (VP
    (V saw)
    (NP
      (NP (Name Bob))
      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31606532355e-06)
(S
  (NP (Name Jack))
  (VP
    (VP (V saw) (NP (Name Bob)))
    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744042695e-07)
 
>>> parser = pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)
>>> for t in parser.nbest_parse(tokens):
...     print t

9   Unit tests for the Viterbi Parse classes

 
>>> from nltk.parse import ViterbiParser
>>> tokens = "Jack saw Bob with my cookie".split()
>>> grammar = toy_pcfg2

Parse the tokenized sentence.

 
>>> parser = ViterbiParser(grammar)
>>> for t in parser.nbest_parse(tokens):
...     print t
(S
  (NP (Name Jack))
  (VP
    (V saw)
    (NP
      (NP (Name Bob))
      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31606532355e-06)

10   Unit tests for the FeatStructNonterminal class

 
>>> from nltk.parse import FeatStructNonterminal
>>> FeatStructNonterminal(
...     pos='n', agr=FeatStructNonterminal(number='pl', gender='f'))
[agr=[gender='f', number='pl'], pos='n']
 
>>> FeatStructNonterminal('VP[+fin]/NP[+pl]')
VP[+fin]/NP[+pl]

11   Unit tests for the Feature Chart Parser classes

Let's use the demo() function as a unit test.

Top-down.

 
>>> nltk.parse.featurechart.demo(should_print_times=False,
...                              should_print_grammar=False, trace=1,
...                              parser=nltk.FeatureTopDownChartParser,
...                              sent='I saw John with a dog')

* FeatureTopDownChartParser
Sentence: I saw John with a dog
|.I.s.J.w.a.d.|
|[-] . . . . .| [0:1] 'I'
|. [-] . . . .| [1:2] 'saw'
|. . [-] . . .| [2:3] 'John'
|. . . [-] . .| [3:4] 'with'
|. . . . [-] .| [4:5] 'a'
|. . . . . [-]| [5:6] 'dog'
|> . . . . . .| [0:0] [INIT][] -> * S[] {}
|> . . . . . .| [0:0] S[] -> * NP[] VP[] {}
|> . . . . . .| [0:0] NP[] -> * NP[] PP[] {}
|> . . . . . .| [0:0] NP[] -> * Det[pl=?x] Noun[pl=?x] {}
|> . . . . . .| [0:0] NP[] -> * 'John' {}
|> . . . . . .| [0:0] NP[] -> * 'I' {}
|[-] . . . . .| [0:1] NP[] -> 'I' *
|[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
|[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
|. > . . . . .| [1:1] PP[] -> * Prep[] NP[] {}
|. > . . . . .| [1:1] Prep[] -> * 'with' {}
|. > . . . . .| [1:1] Prep[] -> * 'under' {}
|. > . . . . .| [1:1] VP[] -> * VP[] PP[] {}
|. > . . . . .| [1:1] VP[] -> * Verb[] NP[] {}
|. > . . . . .| [1:1] VP[] -> * Verb[] {}
|. > . . . . .| [1:1] Verb[] -> * 'ate' {}
|. > . . . . .| [1:1] Verb[] -> * 'saw' {}
|. [-] . . . .| [1:2] Verb[] -> 'saw' *
|. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
|. [-] . . . .| [1:2] VP[] -> Verb[] *
|[---] . . . .| [0:2] S[] -> NP[] VP[] *
|. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
|. . > . . . .| [2:2] PP[] -> * Prep[] NP[] {}
|. . > . . . .| [2:2] Prep[] -> * 'with' {}
|. . > . . . .| [2:2] Prep[] -> * 'under' {}
|[---] . . . .| [0:2] [INIT][] -> S[] *
|. . > . . . .| [2:2] NP[] -> * NP[] PP[] {}
|. . > . . . .| [2:2] NP[] -> * Det[pl=?x] Noun[pl=?x] {}
|. . > . . . .| [2:2] NP[] -> * 'John' {}
|. . > . . . .| [2:2] NP[] -> * 'I' {}
|. . [-] . . .| [2:3] NP[] -> 'John' *
|. [---] . . .| [1:3] VP[] -> Verb[] NP[] *
|. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {}
|. . . > . . .| [3:3] PP[] -> * Prep[] NP[] {}
|. . . > . . .| [3:3] Prep[] -> * 'with' {}
|. . . > . . .| [3:3] Prep[] -> * 'under' {}
|. . . [-] . .| [3:4] Prep[] -> 'with' *
|. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {}
|. . . . > . .| [4:4] NP[] -> * NP[] PP[] {}
|. . . . > . .| [4:4] NP[] -> * Det[pl=?x] Noun[pl=?x] {}
|. . . . > . .| [4:4] NP[] -> * 'John' {}
|. . . . > . .| [4:4] NP[] -> * 'I' {}
|. . . . > . .| [4:4] Det[] -> * 'the' {}
|. . . . > . .| [4:4] Det[] -> * 'my' {}
|. . . . > . .| [4:4] Det[-pl] -> * 'a' {}
|. . . . [-] .| [4:5] Det[-pl] -> 'a' *
|. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False}
|. . . . . > .| [5:5] Noun[-pl] -> * 'dog' {}
|. . . . . > .| [5:5] Noun[-pl] -> * 'cookie' {}
|. . . . . [-]| [5:6] Noun[-pl] -> 'dog' *
|. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] *
|. . . [-----]| [3:6] PP[] -> Prep[] NP[] *
|. . . . [--->| [4:6] NP[] -> NP[] * PP[] {}
|. . . . . . >| [6:6] PP[] -> * Prep[] NP[] {}
|. . . . . . >| [6:6] Prep[] -> * 'with' {}
|. . . . . . >| [6:6] Prep[] -> * 'under' {}
|. . [-------]| [2:6] NP[] -> NP[] PP[] *
|. [---------]| [1:6] VP[] -> Verb[] NP[] *
|. . [------->| [2:6] NP[] -> NP[] * PP[] {}
|[===========]| [0:6] S[] -> NP[] VP[] *
|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
|[===========]| [0:6] [INIT][] -> S[] *
|[-----] . . .| [0:3] S[] -> NP[] VP[] *
|. [---> . . .| [1:3] VP[] -> VP[] * PP[] {}
|. [---------]| [1:6] VP[] -> VP[] PP[] *
|[===========]| [0:6] S[] -> NP[] VP[] *
|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
|[-----] . . .| [0:3] [INIT][] -> S[] *
|. . > . . . .| [2:2] Det[] -> * 'the' {}
|. . > . . . .| [2:2] Det[] -> * 'my' {}
|. . > . . . .| [2:2] Det[-pl] -> * 'a' {}
|> . . . . . .| [0:0] Det[] -> * 'the' {}
|> . . . . . .| [0:0] Det[] -> * 'my' {}
|> . . . . . .| [0:0] Det[-pl] -> * 'a' {}
(S[]
  (NP[] I)
  (VP[]
    (Verb[] saw)
    (NP[]
      (NP[] John)
      (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))))
(S[]
  (NP[] I)
  (VP[]
    (VP[] (Verb[] saw) (NP[] John))
    (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))

Bottom-Up Left-Corner.

 
>>> nltk.parse.featurechart.demo(should_print_times=False,
...                              should_print_grammar=False, trace=1,
...                              parser=nltk.FeatureBottomUpLeftCornerChartParser,
...                              sent='I saw John with a dog')

* FeatureBottomUpLeftCornerChartParser
Sentence: I saw John with a dog
|.I.s.J.w.a.d.|
|[-] . . . . .| [0:1] 'I'
|. [-] . . . .| [1:2] 'saw'
|. . [-] . . .| [2:3] 'John'
|. . . [-] . .| [3:4] 'with'
|. . . . [-] .| [4:5] 'a'
|. . . . . [-]| [5:6] 'dog'
|[-] . . . . .| [0:1] NP[] -> 'I' *
|[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
|[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
|. [-] . . . .| [1:2] Verb[] -> 'saw' *
|. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
|. [-] . . . .| [1:2] VP[] -> Verb[] *
|. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
|[---] . . . .| [0:2] S[] -> NP[] VP[] *
|. . [-] . . .| [2:3] NP[] -> 'John' *
|. . [-> . . .| [2:3] S[] -> NP[] * VP[] {}
|. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {}
|. [---] . . .| [1:3] VP[] -> Verb[] NP[] *
|. [---> . . .| [1:3] VP[] -> VP[] * PP[] {}
|[-----] . . .| [0:3] S[] -> NP[] VP[] *
|. . . [-] . .| [3:4] Prep[] -> 'with' *
|. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {}
|. . . . [-] .| [4:5] Det[-pl] -> 'a' *
|. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False}
|. . . . . [-]| [5:6] Noun[-pl] -> 'dog' *
|. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] *
|. . . . [--->| [4:6] S[] -> NP[] * VP[] {}
|. . . . [--->| [4:6] NP[] -> NP[] * PP[] {}
|. . . [-----]| [3:6] PP[] -> Prep[] NP[] *
|. . [-------]| [2:6] NP[] -> NP[] PP[] *
|. [---------]| [1:6] VP[] -> VP[] PP[] *
|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
|[===========]| [0:6] S[] -> NP[] VP[] *
|. . [------->| [2:6] S[] -> NP[] * VP[] {}
|. . [------->| [2:6] NP[] -> NP[] * PP[] {}
|. [---------]| [1:6] VP[] -> Verb[] NP[] *
|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
|[===========]| [0:6] S[] -> NP[] VP[] *
(S[]
  (NP[] I)
  (VP[]
    (Verb[] saw)
    (NP[]
      (NP[] John)
      (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))))
(S[]
  (NP[] I)
  (VP[]
    (VP[] (Verb[] saw) (NP[] John))
    (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))

Earley.

 
>>> nltk.parse.featurechart.demo(should_print_times=False,
...                              should_print_grammar=False, trace=1,
...                              parser=nltk.FeatureEarleyChartParser,
...                              sent='I saw John with a dog')

* FeatureEarleyChartParser
Sentence: I saw John with a dog
|.I.s.J.w.a.d.|
|[-] . . . . .| [0:1] 'I'
|. [-] . . . .| [1:2] 'saw'
|. . [-] . . .| [2:3] 'John'
|. . . [-] . .| [3:4] 'with'
|. . . . [-] .| [4:5] 'a'
|. . . . . [-]| [5:6] 'dog'
|> . . . . . .| [0:0] [INIT][] -> * S[] {}
|> . . . . . .| [0:0] S[] -> * NP[] VP[] {}
|> . . . . . .| [0:0] NP[] -> * NP[] PP[] {}
|> . . . . . .| [0:0] NP[] -> * Det[pl=?x] Noun[pl=?x] {}
|> . . . . . .| [0:0] NP[] -> * 'John' {}
|> . . . . . .| [0:0] NP[] -> * 'I' {}
|> . . . . . .| [0:0] Det[] -> * 'the' {}
|> . . . . . .| [0:0] Det[] -> * 'my' {}
|> . . . . . .| [0:0] Det[-pl] -> * 'a' {}
|[-] . . . . .| [0:1] NP[] -> 'I' *
|[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
|[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
|. > . . . . .| [1:1] PP[] -> * Prep[] NP[] {}
|. > . . . . .| [1:1] Prep[] -> * 'with' {}
|. > . . . . .| [1:1] Prep[] -> * 'under' {}
|. > . . . . .| [1:1] VP[] -> * VP[] PP[] {}
|. > . . . . .| [1:1] VP[] -> * Verb[] NP[] {}
|. > . . . . .| [1:1] VP[] -> * Verb[] {}
|. > . . . . .| [1:1] Verb[] -> * 'ate' {}
|. > . . . . .| [1:1] Verb[] -> * 'saw' {}
|. [-] . . . .| [1:2] Verb[] -> 'saw' *
|. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
|. [-] . . . .| [1:2] VP[] -> Verb[] *
|[---] . . . .| [0:2] S[] -> NP[] VP[] *
|. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
|. . > . . . .| [2:2] PP[] -> * Prep[] NP[] {}
|. . > . . . .| [2:2] Prep[] -> * 'with' {}
|. . > . . . .| [2:2] Prep[] -> * 'under' {}
|[---] . . . .| [0:2] [INIT][] -> S[] *
|. . > . . . .| [2:2] NP[] -> * NP[] PP[] {}
|. . > . . . .| [2:2] NP[] -> * Det[pl=?x] Noun[pl=?x] {}
|. . > . . . .| [2:2] NP[] -> * 'John' {}
|. . > . . . .| [2:2] NP[] -> * 'I' {}
|. . > . . . .| [2:2] Det[] -> * 'the' {}
|. . > . . . .| [2:2] Det[] -> * 'my' {}
|. . > . . . .| [2:2] Det[-pl] -> * 'a' {}
|. . [-] . . .| [2:3] NP[] -> 'John' *
|. [---] . . .| [1:3] VP[] -> Verb[] NP[] *
|. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {}
|. . . > . . .| [3:3] PP[] -> * Prep[] NP[] {}
|. . . > . . .| [3:3] Prep[] -> * 'with' {}
|. . . > . . .| [3:3] Prep[] -> * 'under' {}
|[-----] . . .| [0:3] S[] -> NP[] VP[] *
|. [---> . . .| [1:3] VP[] -> VP[] * PP[] {}
|[-----] . . .| [0:3] [INIT][] -> S[] *
|. . . [-] . .| [3:4] Prep[] -> 'with' *
|. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {}
|. . . . > . .| [4:4] NP[] -> * NP[] PP[] {}
|. . . . > . .| [4:4] NP[] -> * Det[pl=?x] Noun[pl=?x] {}
|. . . . > . .| [4:4] NP[] -> * 'John' {}
|. . . . > . .| [4:4] NP[] -> * 'I' {}
|. . . . > . .| [4:4] Det[] -> * 'the' {}
|. . . . > . .| [4:4] Det[] -> * 'my' {}
|. . . . > . .| [4:4] Det[-pl] -> * 'a' {}
|. . . . [-] .| [4:5] Det[-pl] -> 'a' *
|. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False}
|. . . . . > .| [5:5] Noun[-pl] -> * 'dog' {}
|. . . . . > .| [5:5] Noun[-pl] -> * 'cookie' {}
|. . . . . [-]| [5:6] Noun[-pl] -> 'dog' *
|. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] *
|. . . [-----]| [3:6] PP[] -> Prep[] NP[] *
|. . . . [--->| [4:6] NP[] -> NP[] * PP[] {}
|. . . . . . >| [6:6] PP[] -> * Prep[] NP[] {}
|. . . . . . >| [6:6] Prep[] -> * 'with' {}
|. . . . . . >| [6:6] Prep[] -> * 'under' {}
|. . [-------]| [2:6] NP[] -> NP[] PP[] *
|. [---------]| [1:6] VP[] -> VP[] PP[] *
|[===========]| [0:6] S[] -> NP[] VP[] *
|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
|[===========]| [0:6] [INIT][] -> S[] *
|. [---------]| [1:6] VP[] -> Verb[] NP[] *
|. . [------->| [2:6] NP[] -> NP[] * PP[] {}
|[===========]| [0:6] S[] -> NP[] VP[] *
|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
(S[]
  (NP[] I)
  (VP[]
    (Verb[] saw)
    (NP[]
      (NP[] John)
      (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))))
(S[]
  (NP[] I)
  (VP[]
    (VP[] (Verb[] saw) (NP[] John))
    (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))

12   Tests for loading grammar files

 
>>> from nltk.parse import FeatureEarleyChartParser
>>> from nltk import data
>>> feat_cfg = data.load('grammars/book_grammars/feat0.fcfg')
>>> fcp = FeatureEarleyChartParser(feat_cfg)