1
2
3
4
5
6
7
8
9 import re
10 import string
11
12 from nltk.tree import Tree
13 import nltk.tag.util
14
15 from api import *
16
17
18
19
20
21 from nltk.metrics import accuracy as _accuracy
23 """
24 Score the accuracy of the chunker against the gold standard.
25 Strip the chunk information from the gold standard and rechunk it using
26 the chunker, then compute the accuracy score.
27
28 @type chunker: C{ChunkParserI}
29 @param chunker: The chunker being evaluated.
30 @type gold: C{tree}
31 @param gold: The chunk structures to score the chunker on.
32 @rtype: C{float}
33 """
34
35 gold_tags = []
36 test_tags = []
37 for gold_tree in gold:
38 test_tree = chunker.parse(gold_tree.flatten())
39 gold_tags += tree2conlltags(gold_tree)
40 test_tags += tree2conlltags(test_tree)
41
42
43
44 return _accuracy(gold_tags, test_tags)
45
46
47
48
49
50
51
53 """
54 A utility class for scoring chunk parsers. C{ChunkScore} can
55 evaluate a chunk parser's output, based on a number of statistics
56 (precision, recall, f-measure, misssed chunks, incorrect chunks).
57 It can also combine the scores from the parsing of multiple texts;
58 this makes it signifigantly easier to evaluate a chunk parser that
59 operates one sentence at a time.
60
61 Texts are evaluated with the C{score} method. The results of
62 evaluation can be accessed via a number of accessor methods, such
63 as C{precision} and C{f_measure}. A typical use of the
64 C{ChunkScore} class is::
65
66 >>> chunkscore = ChunkScore()
67 >>> for correct in correct_sentences:
68 ... guess = chunkparser.parse(correct.leaves())
69 ... chunkscore.score(correct, guess)
70 >>> print 'F Measure:', chunkscore.f_measure()
71 F Measure: 0.823
72
73 @ivar kwargs: Keyword arguments:
74
75 - max_tp_examples: The maximum number actual examples of true
76 positives to record. This affects the C{correct} member
77 function: C{correct} will not return more than this number
78 of true positive examples. This does *not* affect any of
79 the numerical metrics (precision, recall, or f-measure)
80
81 - max_fp_examples: The maximum number actual examples of false
82 positives to record. This affects the C{incorrect} member
83 function and the C{guessed} member function: C{incorrect}
84 will not return more than this number of examples, and
85 C{guessed} will not return more than this number of true
86 positive examples. This does *not* affect any of the
87 numerical metrics (precision, recall, or f-measure)
88
89 - max_fn_examples: The maximum number actual examples of false
90 negatives to record. This affects the C{missed} member
91 function and the C{correct} member function: C{missed}
92 will not return more than this number of examples, and
93 C{correct} will not return more than this number of true
94 negative examples. This does *not* affect any of the
95 numerical metrics (precision, recall, or f-measure)
96
97 - chunk_node: A regular expression indicating which chunks
98 should be compared. Defaults to C{'.*'} (i.e., all chunks).
99
100 @type _tp: C{list} of C{Token}
101 @ivar _tp: List of true positives
102 @type _fp: C{list} of C{Token}
103 @ivar _fp: List of false positives
104 @type _fn: C{list} of C{Token}
105 @ivar _fn: List of false negatives
106
107 @type _tp_num: C{int}
108 @ivar _tp_num: Number of true positives
109 @type _fp_num: C{int}
110 @ivar _fp_num: Number of false positives
111 @type _fn_num: C{int}
112 @ivar _fn_num: Number of false negatives.
113 """
115 self._correct = set()
116 self._guessed = set()
117 self._tp = set()
118 self._fp = set()
119 self._fn = set()
120 self._max_tp = kwargs.get('max_tp_examples', 100)
121 self._max_fp = kwargs.get('max_fp_examples', 100)
122 self._max_fn = kwargs.get('max_fn_examples', 100)
123 self._chunk_node = kwargs.get('chunk_node', '.*')
124 self._tp_num = 0
125 self._fp_num = 0
126 self._fn_num = 0
127 self._count = 0
128 self._tags_correct = 0.0
129 self._tags_total = 0.0
130
131 self._measuresNeedUpdate = False
132
134 if (self._measuresNeedUpdate):
135 self._tp = self._guessed & self._correct
136 self._fn = self._correct - self._guessed
137 self._fp = self._guessed - self._correct
138 self._tp_num = len(self._tp)
139 self._fp_num = len(self._fp)
140 self._fn_num = len(self._fn)
141 self._measuresNeedUpdate = False
142
143 - def score(self, correct, guessed):
144 """
145 Given a correctly chunked sentence, score another chunked
146 version of the same sentence.
147
148 @type correct: chunk structure
149 @param correct: The known-correct ("gold standard") chunked
150 sentence.
151 @type guessed: chunk structure
152 @param guessed: The chunked sentence to be scored.
153 """
154 self._correct |= _chunksets(correct, self._count, self._chunk_node)
155 self._guessed |= _chunksets(guessed, self._count, self._chunk_node)
156 self._count += 1
157 self._measuresNeedUpdate = True
158
159 try:
160 correct_tags = tree2conlltags(correct)
161 guessed_tags = tree2conlltags(guessed)
162 except ValueError:
163
164
165
166 correct_tags = guessed_tags = ()
167 self._tags_total += len(correct_tags)
168 self._tags_correct += sum(1 for (t,g) in zip(guessed_tags,
169 correct_tags)
170 if t==g)
171
173 """
174 @return: The overall tag-based accuracy for all text that have
175 been scored by this C{ChunkScore}, using the IOB (conll2000)
176 tag encoding.
177 """
178 if self._tags_total == 0: return 1
179 return self._tags_correct/self._tags_total
180
182 """
183 @return: the overall precision for all texts that have been
184 scored by this C{ChunkScore}.
185 @rtype: C{float}
186 """
187 self._updateMeasures()
188 div = self._tp_num + self._fp_num
189 if div == 0: return 0
190 else: return float(self._tp_num) / div
191
193 """
194 @return: the overall recall for all texts that have been
195 scored by this C{ChunkScore}.
196 @rtype: C{float}
197 """
198 self._updateMeasures()
199 div = self._tp_num + self._fn_num
200 if div == 0: return 0
201 else: return float(self._tp_num) / div
202
204 """
205 @return: the overall F measure for all texts that have been
206 scored by this C{ChunkScore}.
207 @rtype: C{float}
208
209 @param alpha: the relative weighting of precision and recall.
210 Larger alpha biases the score towards the precision value,
211 while smaller alpha biases the score towards the recall
212 value. C{alpha} should have a value in the range [0,1].
213 @type alpha: C{float}
214 """
215 self._updateMeasures()
216 p = self.precision()
217 r = self.recall()
218 if p == 0 or r == 0:
219 return 0
220 return 1/(alpha/p + (1-alpha)/r)
221
223 """
224 @rtype: C{list} of chunks
225 @return: the chunks which were included in the
226 correct chunk structures, but not in the guessed chunk
227 structures, listed in input order.
228 """
229 self._updateMeasures()
230 chunks = list(self._fn)
231 return [c[1] for c in chunks]
232
234 """
235 @rtype: C{list} of chunks
236 @return: the chunks which were included in the
237 guessed chunk structures, but not in the correct chunk
238 structures, listed in input order.
239 """
240 self._updateMeasures()
241 chunks = list(self._fp)
242 return [c[1] for c in chunks]
243
245 """
246 @rtype: C{list} of chunks
247 @return: the chunks which were included in the correct
248 chunk structures, listed in input order.
249 """
250 chunks = list(self._correct)
251 return [c[1] for c in chunks]
252
254 """
255 @rtype: C{list} of chunks
256 @return: the chunks which were included in the guessed
257 chunk structures, listed in input order.
258 """
259 chunks = list(self._guessed)
260 return [c[1] for c in chunks]
261
265
267 """
268 @rtype: C{String}
269 @return: a concise representation of this C{ChunkScoring}.
270 """
271 return '<ChunkScoring of '+`len(self)`+' chunks>'
272
274 """
275 @rtype: C{String}
276 @return: a verbose representation of this C{ChunkScoring}.
277 This representation includes the precision, recall, and
278 f-measure scores. For other information about the score,
279 use the accessor methods (e.g., C{missed()} and
280 C{incorrect()}).
281 """
282 return ("ChunkParse score:\n" +
283 (" IOB Accuracy: %5.1f%%\n" % (self.accuracy()*100)) +
284 (" Precision: %5.1f%%\n" % (self.precision()*100)) +
285 (" Recall: %5.1f%%\n" % (self.recall()*100))+
286 (" F-Measure: %5.1f%%" % (self.f_measure()*100)))
287
288
289
301
302
346
347
348
349 _LINE_RE = re.compile('(\S+)\s+(\S+)\s+([IOB])-?(\S+)?')
350 -def conllstr2tree(s, chunk_types=('NP', 'PP', 'VP'), top_node="S"):
351 """
352 Convert a CoNLL IOB string into a tree. Uses the specified chunk types
353 (defaults to NP, PP and VP), and creates a tree rooted at a node
354 labeled S (by default).
355
356 @param s: The CoNLL string to be converted.
357 @type s: C{string}
358 @param chunk_types: The chunk types to be converted.
359 @type chunk_types: C{tuple}
360 @param top_node: The node label to use for the root.
361 @type top_node: C{string}
362 @return: A chunk structure for a single sentence
363 encoded in the given CONLL 2000 style string.
364 @rtype: L{Tree}
365 """
366
367 stack = [Tree(top_node, [])]
368
369 for lineno, line in enumerate(s.split('\n')):
370 if not line.strip(): continue
371
372
373 match = _LINE_RE.match(line)
374 if match is None:
375 raise ValueError, 'Error on line %d' % lineno
376 (word, tag, state, chunk_type) = match.groups()
377
378
379 if (chunk_types is not None and
380 chunk_type not in chunk_types):
381 state = 'O'
382
383
384
385 mismatch_I = state == 'I' and chunk_type != stack[-1].node
386 if state in 'BO' or mismatch_I:
387 if len(stack) == 2: stack.pop()
388
389
390 if state == 'B' or mismatch_I:
391 chunk = Tree(chunk_type, [])
392 stack[-1].append(chunk)
393 stack.append(chunk)
394
395
396 stack[-1].append((word, tag))
397
398 return stack[0]
399
423
454
456 """
457 Convert a tree to the CoNLL IOB string format
458
459 @param t: The tree to be converted.
460 @type t: C{Tree}
461 @return: A multiline string where each line contains a word, tag and IOB tag.
462 @rtype: C{string}
463 """
464 lines = [string.join(token) for token in tree2conlltags(t)]
465 return '\n'.join(lines)
466
467
468
469 _IEER_DOC_RE = re.compile(r'<DOC>\s*'
470 r'(<DOCNO>\s*(?P<docno>.+?)\s*</DOCNO>\s*)?'
471 r'(<DOCTYPE>\s*(?P<doctype>.+?)\s*</DOCTYPE>\s*)?'
472 r'(<DATE_TIME>\s*(?P<date_time>.+?)\s*</DATE_TIME>\s*)?'
473 r'<BODY>\s*'
474 r'(<HEADLINE>\s*(?P<headline>.+?)\s*</HEADLINE>\s*)?'
475 r'<TEXT>(?P<text>.*?)</TEXT>\s*'
476 r'</BODY>\s*</DOC>\s*', re.DOTALL)
477
478 _IEER_TYPE_RE = re.compile('<b_\w+\s+[^>]*?type="(?P<type>\w+)"')
479
480 -def _ieer_read_text(s, top_node):
481 stack = [Tree(top_node, [])]
482
483
484 if s is None:
485 return []
486 for piece_m in re.finditer('<[^>]+>|[^\s<]+', s):
487 piece = piece_m.group()
488 try:
489 if piece.startswith('<b_'):
490 m = _IEER_TYPE_RE.match(piece)
491 if m is None: print 'XXXX', piece
492 chunk = Tree(m.group('type'), [])
493 stack[-1].append(chunk)
494 stack.append(chunk)
495 elif piece.startswith('<e_'):
496 stack.pop()
497
498
499
500 else:
501 stack[-1].append(piece)
502 except (IndexError, ValueError):
503 raise ValueError('Bad IEER string (error at character %d)' %
504 piece_m.start())
505 if len(stack) != 1:
506 raise ValueError('Bad IEER string')
507 return stack[0]
508
509 -def ieerstr2tree(s, chunk_types = ['LOCATION', 'ORGANIZATION', 'PERSON', 'DURATION',
510 'DATE', 'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE'], top_node="S"):
511 """
512 Convert a string of chunked tagged text in the IEER named
513 entity format into a chunk structure. Chunks are of several
514 types, LOCATION, ORGANIZATION, PERSON, DURATION, DATE, CARDINAL,
515 PERCENT, MONEY, and MEASURE.
516
517 @return: A chunk structure containing the chunked tagged text that is
518 encoded in the given IEER style string.
519 @rtype: L{Tree}
520 """
521
522
523
524 m = _IEER_DOC_RE.match(s)
525 if m:
526 return {
527 'text': _ieer_read_text(m.group('text'), top_node),
528 'docno': m.group('docno'),
529 'doctype': m.group('doctype'),
530 'date_time': m.group('date_time'),
531
532
533 'headline': _ieer_read_text(m.group('headline'), top_node),
534 }
535 else:
536 return _ieer_read_text(s, top_node)
537
538
540
541 s = "[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./."
542 import nltk
543 t = nltk.chunk.tagstr2tree(s, chunk_node='NP')
544 print t.pprint()
545 print
546
547 s = """
548 These DT B-NP
549 research NN I-NP
550 protocols NNS I-NP
551 offer VBP B-VP
552 to TO B-PP
553 the DT B-NP
554 patient NN I-NP
555 not RB O
556 only RB O
557 the DT B-NP
558 very RB I-NP
559 best JJS I-NP
560 therapy NN I-NP
561 which WDT B-NP
562 we PRP B-NP
563 have VBP B-VP
564 established VBN I-VP
565 today NN B-NP
566 but CC B-NP
567 also RB I-NP
568 the DT B-NP
569 hope NN I-NP
570 of IN B-PP
571 something NN B-NP
572 still RB B-ADJP
573 better JJR I-ADJP
574 . . O
575 """
576
577 conll_tree = conllstr2tree(s, chunk_types=('NP', 'PP'))
578 print conll_tree.pprint()
579
580
581 print "CoNLL output:"
582 print nltk.chunk.tree2conllstr(conll_tree)
583 print
584
585
586 if __name__ == '__main__':
587 demo()
588