Package nltk :: Package tag
[hide private]
[frames] | no frames]

Source Code for Package nltk.tag

 1  # Natural Language Toolkit: Taggers 
 2  # 
 3  # Copyright (C) 2001-2011 NLTK Project 
 4  # Author: Edward Loper <edloper@gradient.cis.upenn.edu> 
 5  #         Steven Bird <sb@csse.unimelb.edu.au> (minor additions) 
 6  # URL: <http://www.nltk.org/> 
 7  # For license information, see LICENSE.TXT 
 8   
 9  """ 
10  Classes and interfaces for tagging each token of a sentence with 
11  supplementary information, such as its part of speech.  This task, 
12  which is known as X{tagging}, is defined by the L{TaggerI} interface. 
13  """ 
14   
15  from api import * 
16  from util import * 
17  from simplify import * 
18  from sequential import * 
19  from brill import * 
20  from tnt import * 
21  from hunpos import * 
22  from stanford import * 
23  import nltk 
24   
25  __all__ = [ 
26      # Tagger interface 
27      'TaggerI', 
28   
29      # Standard POS tagger 
30      'pos_tag', 'batch_pos_tag', 
31       
32      # Should these be included:? 
33      #'SequentialBackoffTagger', 'ContextTagger', 
34   
35      # Sequential backoff taggers. 
36      'DefaultTagger', 'UnigramTagger', 'BigramTagger', 'TrigramTagger', 
37      'NgramTagger', 'AffixTagger', 'RegexpTagger', 
38   
39      # Brill tagger -- trainer names? 
40      'BrillTagger', 'BrillTaggerTrainer', 'FastBrillTaggerTrainer', 
41   
42      # Utilities.  Note: conversion functions x2y are intentionally 
43      # left out; they should be accessed as nltk.tag.x2y().  Similarly 
44      # for nltk.tag.accuracy. 
45      'untag',  
46      ] 
47   
48  # Import hmm module if numpy is installed 
49  try: 
50      import numpy 
51      from hmm import * 
52      __all__ += ['HiddenMarkovModelTagger', 'HiddenMarkovModelTrainer',] 
53      # [xx] deprecated HiddenMarkovModel etc objects? 
54  except ImportError: 
55      pass 
56   
57  # Standard treebank POS tagger 
58  _POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle' 
59 -def pos_tag(tokens):
60 """ 61 Use NLTK's currently recommended part of speech tagger to 62 tag the given list of tokens. 63 """ 64 tagger = nltk.data.load(_POS_TAGGER) 65 return tagger.tag(tokens)
66
67 -def batch_pos_tag(sentences):
68 """ 69 Use NLTK's currently recommended part of speech tagger to tag the 70 given list of sentences, each consisting of a list of tokens. 71 """ 72 tagger = nltk.data.load(_POS_TAGGER) 73 return tagger.batch_tag(sentences)
74