Package nltk :: Package tag :: Module simplify
[hide private]
[frames] | no frames]

Source Code for Module nltk.tag.simplify

  1  # Natural Language Toolkit: POS Tag Simplification 
  2  # 
  3  # Copyright (C) 2001-2011 NLTK Project 
  4  # Author: Steven Bird <sb@csse.unimelb.edu.au> 
  5  # URL: <http://www.nltk.org/> 
  6  # For license information, see LICENSE.TXT 
  7   
  8   
  9  ###################################################################### 
 10  #{ Brown 
 11  ###################################################################### 
 12   
 13  # http://khnt.hit.uib.no/icame/manuals/brown/INDEX.HTM 
 14   
 15  brown_mapping1 = { 
 16      'j': 'ADJ', 'p': 'PRO', 'm': 'MOD', 'q': 'DET', 
 17      'w': 'WH', 'r': 'ADV', 'i': 'P', 
 18      'u': 'UH', 'e': 'EX', 'o': 'NUM', 'b': 'V', 
 19      'h': 'V', 'f': 'FW', 'a': 'DET', 't': 'TO', 
 20      'cc': 'CNJ', 'cs': 'CNJ', 'cd': 'NUM', 
 21      'do': 'V', 'dt': 'DET', 
 22      'nn': 'N', 'nr': 'N', 'np': 'NP', 'nc': 'N' 
 23      } 
 24  brown_mapping2 = { 
 25      'vb': 'V', 'vbd': 'VD', 'vbg': 'VG', 'vbn': 'VN' 
 26      } 
 27   
28 -def simplify_brown_tag(tag):
29 tag = tag.lower() 30 if tag[0] in brown_mapping1: 31 return brown_mapping1[tag[0]] 32 elif tag[:2] in brown_mapping1: # still doesn't handle DOD tag correctly 33 return brown_mapping1[tag[:2]] 34 try: 35 if '-' in tag: 36 tag = tag.split('-')[0] 37 return brown_mapping2[tag] 38 except KeyError: 39 return tag.upper()
40 41 ###################################################################### 42 #{ Wall Street Journal tags (Penn Treebank) 43 ###################################################################### 44 45 wsj_mapping = { 46 '-lrb-': '(', '-rrb-': ')', '-lsb-': '(', 47 '-rsb-': ')', '-lcb-': '(', '-rcb-': ')', 48 '-none-': '', 'cc': 'CNJ', 'cd': 'NUM', 49 'dt': 'DET', 'ex': 'EX', 'fw': 'FW', # existential "there", foreign word 50 'in': 'P', 'jj': 'ADJ', 'jjr': 'ADJ', 51 'jjs': 'ADJ', 'ls': 'L', 'md': 'MOD', # list item marker 52 'nn': 'N', 'nnp': 'NP', 'nnps': 'NP', 53 'nns': 'N', 'pdt': 'DET', 'pos': '', 54 'prp': 'PRO', 'prp$': 'PRO', 'rb': 'ADV', 55 'rbr': 'ADV', 'rbs': 'ADV', 'rp': 'PRO', 56 'sym': 'S', 'to': 'TO', 'uh': 'UH', 57 'vb': 'V', 'vbd': 'VD', 'vbg': 'VG', 58 'vbn': 'VN', 'vbp': 'V', 'vbz': 'V', 59 'wdt': 'WH', 'wp': 'WH', 'wp$': 'WH', 60 'wrb': 'WH', 61 'bes': 'V', 'hvs': 'V', 'prp^vbp': 'PRO' # additions for NPS Chat corpus 62 } 63
64 -def simplify_wsj_tag(tag):
65 if tag and tag[0] == '^': 66 tag = tag[1:] 67 try: 68 tag = wsj_mapping[tag.lower()] 69 except KeyError: 70 pass 71 return tag.upper()
72 73 indian_mapping = { 74 'nn': 'N', 'vm': 'MOD', 'jj': 'ADJ', 'nnp': 'NP', 75 'prp': 'PRO', 'prep': 'PRE', 'vaux': 'V', 'vfm': 'V', 76 'cc': 'CNJ', 'nnpc': 'NP', 'nnc': 'N', 'qc': 'QC', 77 'dem': 'DET', 'vrb': 'V', 'qfnum': 'NUM', 'rb': 'ADV', 78 'qf': 'DET', 'punc': '.', 'rp': 'PRT', 'psp': 'PSP', 79 'nst': 'N', 'nvb': 'N', 'vjj': 'V', 'neg': 'NEG', 80 'vnn': 'V', 'xc': 'XC', 'intf': 'INTF', 'nloc': 'N', 81 'jvb': 'ADJ', 'wq': 'WH', 'qw': 'WH', 'jj:?': 'ADJ', 82 '"cc': 'CNJ', 'nnp,': 'NP', 'sym\xc0\xa7\xb7': 'SYM', 83 'symc': 'SYM'} 84
85 -def simplify_indian_tag(tag):
86 if ':' in tag: 87 tag = tag.split(':')[0] 88 try: 89 tag = indian_mapping[tag.lower()] 90 except KeyError: 91 pass 92 return tag.upper()
93 94 95 ###################################################################### 96 #{ Alpino tags 97 ###################################################################### 98 99 alpino_mapping = { 100 'noun':'N', 'name': 'NP', 'vg': 'VG', 'punct':'.', 101 'verb':'V', 'pron': 'PRO', 'prep':'P' 102 } 103
104 -def simplify_alpino_tag(tag):
105 try: 106 tag = alpino_mapping[tag] 107 except KeyError: 108 pass 109 return tag.upper()
110 111 ###################################################################### 112 #{ Default tag simplification 113 ###################################################################### 114
115 -def simplify_tag(tag):
116 return tag[0].upper()
117