1
2
3
4
5
6
7
8
9 """
10 Classes and interfaces for tagging each token of a sentence with
11 supplementary information, such as its part of speech. This task,
12 which is known as X{tagging}, is defined by the L{TaggerI} interface.
13 """
14
15 from api import *
16 from util import *
17 from simplify import *
18 from sequential import *
19 from brill import *
20 from tnt import *
21 from hunpos import *
22 from stanford import *
23 import nltk
24
25 __all__ = [
26
27 'TaggerI',
28
29
30 'pos_tag', 'batch_pos_tag',
31
32
33
34
35
36 'DefaultTagger', 'UnigramTagger', 'BigramTagger', 'TrigramTagger',
37 'NgramTagger', 'AffixTagger', 'RegexpTagger',
38
39
40 'BrillTagger', 'BrillTaggerTrainer', 'FastBrillTaggerTrainer',
41
42
43
44
45 'untag',
46 ]
47
48
49 try:
50 import numpy
51 from hmm import *
52 __all__ += ['HiddenMarkovModelTagger', 'HiddenMarkovModelTrainer',]
53
54 except ImportError:
55 pass
56
57
58 _POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
60 """
61 Use NLTK's currently recommended part of speech tagger to
62 tag the given list of tokens.
63 """
64 tagger = nltk.data.load(_POS_TAGGER)
65 return tagger.tag(tokens)
66
68 """
69 Use NLTK's currently recommended part of speech tagger to tag the
70 given list of sentences, each consisting of a list of tokens.
71 """
72 tagger = nltk.data.load(_POS_TAGGER)
73 return tagger.batch_tag(sentences)
74