Package nltk :: Package wordnet :: Module synset
[hide private]
[frames] | no frames]

Source Code for Module nltk.wordnet.synset

  1  # Natural Language Toolkit: Wordnet Interface: Wordnet Module 
  2  # 
  3  # Copyright (C) 2001-2008 NLTK Project 
  4  # Author: Oliver Steele <steele@osteele.com> 
  5  #         David Ormiston Smith <daosmith@csse.unimelb.edu.au>> 
  6  #         Steven Bird <sb@csse.unimelb.edu.au> 
  7  # URL: <http://nltk.org> 
  8  # For license information, see LICENSE.TXT 
  9   
 10  import math 
 11  import pickle 
 12  import string 
 13  import re 
 14   
 15  from nltk import defaultdict 
 16  from nltk.util import binary_search_file 
 17  from nltk.internals import deprecated 
 18   
 19  from util import * 
 20  import dictionary 
 21  import similarity 
 22  from frequency import * 
 23  from lexname import Lexname 
24 25 -class Word(object):
26 27 @deprecated("Use nltk.corpus.wordnet.Lemma() instead.")
28 - def __init__(self, line):
29 """ 30 Extract a word from a line of a WordNet POS file. 31 @type line: C{string} 32 @param line: The appropriate line taken from the Wordnet data files. 33 """ 34 35 tokens = line.split() 36 ints = map(int, tokens[int(tokens[3]) + 4:]) 37 38 self.form = tokens[0].replace('_', ' ') # orthography 39 self.pos = normalizePOS(tokens[1]) # NOUN, VERB, ADJECTIVE, ADVERB 40 self.taggedSenseCount = ints[1] # Number of senses tagged 41 self._synsetOffsets = ints[2:ints[0]+2] # Offsets of this word's synsets
42
43 - def synsets(self):
44 """ 45 Get a sequence of the L{synsets}s of this word. 46 47 >>> from nltk.wordnet import * 48 >>> N['dog'].synsets() 49 [{noun: dog, domestic dog, Canis familiaris}, {noun: frump, dog}, {noun: dog}, {noun: cad, bounder, blackguard, dog, hound, heel}, {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, {noun: pawl, detent, click, dog}, {noun: andiron, firedog, dog, dog-iron}] 50 51 @return: A list of this L{Word}'s L{Synset}s 52 """ 53 54 try: 55 return self._synsets 56 except AttributeError: 57 self._synsets = [dictionary.synset(self.pos, offset) 58 for offset in self._synsetOffsets] 59 del self._synsetOffsets 60 return self._synsets
61
62 - def senses(self):
63 """ 64 Return a list of WordSense objects corresponding to this word's L{synset}s. 65 """ 66 return [s.wordSense(self.form) for s in self.synsets()]
67
68 - def senseCounts(self):
69 """ 70 Return the frequencies of each sense of this word in a tagged concordance. 71 """ 72 return [s.count() for s in self.senses()]
73
74 - def isTagged(self):
75 """ 76 >>> from nltk.wordnet import * 77 >>> N['dog'].isTagged() 78 True 79 80 @return: True/false (1/0) if one of this L{Word}'s senses is tagged. 81 """ 82 return self.taggedSenseCount > 0
83 84 # Broken 85 # def getAdjectivePositions(self): 86 # """ 87 # >>> from nltk.wordnet import * 88 # >>> ADJ['clear'].getAdjectivePositions() 89 # [None, 'predicative'] 90 # 91 # @return: Return a list of adjective positions that this word can 92 # appear in. These are elements of ADJECTIVE_POSITIONS. 93 # """ 94 # 95 # return list(set(synset.position for synset in self)) 96
97 - def __getitem__(self, idx):
98 return self.synsets()[idx]
99
100 - def __iter__(self):
101 return iter(self.synsets())
102
103 - def __contains__(self, item):
104 return item in self.synsets()
105
106 - def __getslice__(self, i, j):
107 return self.synsets()[i:j]
108
109 - def __len__(self):
110 return len(self.synsets())
111
112 - def __repr__(self):
113 # return "<Word:" + self.form + '/' + self.pos + ">" 114 return self.__str__()
115
116 - def __str__(self):
117 return self.form + ' (' + self.pos + ")"
118
119 - def __cmp__(self, other):
120 return _compareInstances(self, other, ('form', 'pos'))
121
122 - def __hash__(self):
123 return hash((self.form, self.pos))
124
125 126 -class WordSense(object):
127 """ 128 A single word-sense pairing, indicated by in WordNet by a sense key of 129 the form:: 130 lemma%ss_type:lex_filenum:lex_id:head_word:head_id 131 """ 132 133 _ssTypeMap = {'n': 1, 'v': 2, 'a': 3, 'r': 4, 's':5} 134 _ssTypeRevMap = dict((v,k) for k,v in _ssTypeMap.iteritems()) 135 136 @deprecated("Use nltk.corpus.wordnet.Lemma() instead.")
137 - def __init__(self, senseKey):
138 self.senseKey = senseKey 139 self.lemma, remainder = senseKey.split('%', 1) 140 (ssType, lexFilenum, lexId, 141 self.headWord, headId) = remainder.split(':') 142 143 self.ssType = self._ssTypeRevMap[int(ssType)] 144 self.lexFilenum = int(lexFilenum) 145 self.lexId = int(lexId) 146 try: 147 self.headId = int(headId) 148 except ValueError: 149 self.headId = None
150
151 - def count(self):
152 return senseCount(self.senseKey)
153
154 - def _senseIndexLine(self):
155 try: 156 WordSense._index 157 except AttributeError: 158 path = nltk.data.find('corpora/wordnet/index.sense') 159 WordSense._index = open(path, FILE_OPEN_MODE) 160 161 res = binary_search_file(WordSense._index, self.senseKey) 162 if res: 163 return res 164 raise ValueError("Count not find data for sense '%s'. " 165 "Is the key wrong?" % self.senseKey)
166
167 - def synset(self):
168 line = self._senseIndexLine() 169 return dictionary.synset(self.ssType, int(line.split()[1]))
170
171 - def word(self):
172 return dictionary.word(self.lemma, self.ssType)
173
174 - def senseNo(self):
175 line = self._senseIndexLine() 176 return int(line.split()[2])
177
178 - def lexname(self):
179 return Lexname.lexnames[self.lexFilenum]
180
181 - def __str__(self):
182 return ('%s (%s) %d' 183 % (self.lemma, normalizePOS(self.ssType), self.senseNo()))
184
185 - def __cmp__(self, other):
186 return _compareInstances(self, other, ('senseKey',))
187
188 - def __hash__(self):
189 return hash(self.senseKey)
190 191 192 __repr__ = __str__ 193 194 @staticmethod
195 - def fromSynset(synset, lemma, lex_id):
196 ss_type = WordSense._ssTypeMap[synset.ssType] 197 lex_filenum = synset.lexname.id 198 head_word = '' 199 head_id = '' 200 if synset.ssType == 's': 201 # Satellite adjectives are treated specially 202 head_word = synset.headSynset.words[0] 203 head_id = synset.headSynset.wordSenses[0].lexId 204 205 return WordSense.fromKeyParams( 206 lemma.lower(), ss_type, lex_filenum, lex_id, head_word, head_id)
207 208 @staticmethod
209 - def fromKeyParams(lemma, ss_type, lex_filenum, lex_id, 210 head_word='', head_id=''):
211 212 if head_word: 213 head_id = '%02d' % head_id 214 215 return WordSense('%s%%%d:%02d:%02d:%s:%s' 216 % (lemma, ss_type, lex_filenum, lex_id, head_word, head_id))
217
218 219 -class Synset(object):
220 """ 221 A set of synonyms. 222 223 Each synset contains one or more Senses, which represent a 224 specific sense of a specific word. Senses can be retrieved via 225 synset.senses() or through the index notations synset[0], 226 synset[string], or synset[word]. Synsets participate in 227 lexical relations, which can be accessed via synset.relations(). 228 229 >>> from nltk.wordnet import * 230 >>> N['dog'][0] 231 {noun: dog, domestic_dog, Canis_familiaris} 232 >>> N['dog'][0][HYPERNYM] 233 [{noun: canine, canid}, {noun: domestic_animal, domesticated_animal}] 234 >>> V['think'][0].verbFrameStrings 235 ['Something think something Adjective/Noun', 'Somebody think somebody'] 236 237 @type pos: C{string} 238 @ivar pos: The part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB. 239 240 @type offset: C{int} 241 @ivar offset: An integer offset into the part-of-speech file. Together 242 with pos, this can be used as a unique id. 243 244 @type gloss: C{string} 245 @ivar gloss: A gloss (dictionary definition) for the sense. 246 247 @type verbFrames: C{list} of C{integer} 248 @ivar verbFrames: A sequence of integers that index into 249 VERB_FRAME_STRINGS. These list the verb frames that any 250 Sense in this synset participates in. (See also 251 Sense.verbFrames.) Defined only for verbs. 252 """ 253 254 @deprecated("Use nltk.corpus.wordnet.Synset() instead.")
255 - def __init__(self, pos, offset, line):
256 """Initialize the synset from a line in a WordNet lexicographer file.""" 257 258 # Part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB. 259 self.pos = pos 260 261 # Integer offset into the part-of-speech file. Together with pos, 262 # this can be used as a unique id. 263 self.offset = offset 264 265 # cache min and max depth 266 self._min_depth = self._max_depth = None 267 268 # The synset entry can be broadly divided into two parts: the 269 # synset and relational data, and its human readable description, or 270 # gloss. The '|' character separates these. 271 272 dividerIndex = line.index('|') 273 tokens = line[:dividerIndex].split() 274 self.ssType = tokens[2] 275 self.gloss = line[dividerIndex + 1:].strip() 276 self.lexname = Lexname.lexnames[int(tokens[1])] 277 278 # TODO: This next code is dense and confusing. Clean up at some point. 279 # line is of the form: 280 # synset_offset lex_filenum ss_type w_cnt word lex_id [word lex_id...] p_cnt [ptr...] [frames...] | gloss 281 282 synset_cnt = int(tokens[3], 16) # hex integer representing number of items in the synset; same as w_cnt above 283 284 #extract all pairs of the form (sense, lex_id), plus a remainder 285 (senseTuples, remainder1) = _partition(tokens[4:], 2, synset_cnt) 286 self.words = [form for form, lex_id in senseTuples] 287 288 #extract all pointer quadruples, plus a remainder 289 (self._pointerTuples, remainder2) = _partition(remainder1[1:], 4, int(remainder1[0])) 290 291 # Find word senses (via sense keys) from lemma and lex_id 292 if self.ssType == 's': 293 # need head synset available for finding sense_keys 294 self.headSynset = self.relation('similar')[0] 295 self.wordSenses = [WordSense.fromSynset(self, form, int(lex_id, 16)) 296 for form, lex_id in senseTuples] 297 298 #frames: In data.verb only, a list of numbers corresponding to the 299 #generic verb sentence frames for word s in the synset. frames is of 300 #the form: 301 #f_cnt + f_num w_num [ + f_num w_num...] 302 #where f_cnt is a two digit decimal integer indicating the number of 303 #generic frames listed, f_num is a two digit decimal integer frame 304 #number, and w_num is a two digit hexadecimal integer indicating the 305 #word in the synset that the frame applies to. As with pointers, if 306 #this number is 00 , f_num applies to all word s in the synset. If 307 #non-zero, it is applicable only to the word indicated. Word numbers 308 #are assigned as described for pointers. 309 310 if pos == VERB: 311 (vfTuples, remainder3) = _partition(remainder2[1:], 3, int(remainder2[0])) 312 313 #now only used for senseVerbFrames 314 def extractVerbFrames(index, vfTuples): 315 return tuple(map(lambda t:int(t[1]), filter(lambda t,i=index:int(t[2],16) in (0, i), vfTuples)))
316 317 senseVerbFrames = [] 318 for index in range(1, len(self.words) + 1): 319 senseVerbFrames.append(extractVerbFrames(index, vfTuples)) 320 self._senseVerbFrames = senseVerbFrames 321 322 # A sequence of integers that index into VERB_FRAME_STRINGS. These 323 # list the verb frames that any Sense in this synset participates 324 # in (see also Sense.verbFrames). Defined only for verbs. 325 326 self.verbFrames = tuple(extractVerbFrames(None, vfTuples)) 327 328 #A list of verb frame strings for this synset 329 self.verbFrameStrings = self.extractVerbFrameStrings(vfTuples)
330
331 - def wordSense(self, word):
332 """ 333 Return the WordSense object for the given word in this synset. 334 """ 335 word = word.replace(' ', '_') 336 try: 337 index = self.words.index(word) 338 except ValueError: 339 try: 340 # Try for proper noun 341 index = self.words.index(word.title()) 342 except ValueError: 343 raise ValueError( 344 "Could not find word '%s' for this synset." % word) 345 346 return self.wordSenses[index]
347
348 - def extractVerbFrameStrings(self, vfTuples):
349 """ 350 Return a list of verb frame strings for this synset. 351 """ 352 # extract a frame index if 3rd item is 00 353 frame_indices = [int(t[1]) for t in vfTuples if int(t[2], 16) == 0] 354 try: 355 verbFrames = [VERB_FRAME_STRINGS[i] for i in frame_indices] 356 except IndexError: 357 return [] 358 #ideally we should build 3rd person morphology for this form 359 form = self[0] 360 verbFrameStrings = [vf % form for vf in verbFrames] 361 return verbFrameStrings
362
363 - def relations(self):
364 """ 365 Return a dictionary of synsets, one per lexical relation 366 367 @return: relations defined on this L{Synset}. 368 """ 369 370 # Load the pointers from the Wordnet files if necessary. 371 if not hasattr(self, '_relations'): 372 relations = defaultdict(list) 373 374 for (type, offset, pos, indices) in self._pointerTuples: 375 rel = _RELATION_TABLE[type] 376 idx = int(indices, 16) & 255 377 pos = normalizePOS(pos) 378 offset = int(offset) 379 380 synset = dictionary.synset(pos, offset) 381 if idx: 382 relations[rel].append(synset[idx-1]) 383 else: 384 relations[rel].append(synset) 385 del self._pointerTuples 386 self._relations = dict(relations) 387 388 return self._relations
389
390 - def relation(self, rel):
391 return self.relations().get(rel, [])
392 393 ### BROKEN:
394 - def isTagged(self):
395 """ 396 >>> from nltk.wordnet import * 397 >>> N['dog'][0].isTagged() 398 True 399 400 >>> N['dog'][1].isTagged() 401 False 402 403 @return: True/false (1/0) if one of this L{Word}'s senses is tagged. 404 """ 405 return len(filter(Word.isTagged, self.words)) > 0
406
407 - def __str__(self):
408 """ 409 Return a human-readable representation. 410 411 >>> from nltk.wordnet import * 412 >>> str(N['dog'][0].synset) 413 '{noun: dog, domestic dog, Canis familiaris}' 414 """ 415 return "{" + self.pos + ": " + string.join(self.words, ", ") + "}"
416
417 - def __repr__(self):
418 return "{" + self.pos + ": " + string.join(self.words, ", ") + "}"
419
420 - def __cmp__(self, other):
421 return _compareInstances(self, other, ('pos', 'offset'))
422
423 - def __hash__(self):
424 return hash((self.pos, self.offset))
425
426 - def __ne__(self, other):
427 return not (self==other)
428
429 - def __getitem__(self, idx):
430 try: 431 return self.words[idx] # integer key 432 except TypeError: 433 return self.relation(idx) # string key
434
435 - def __iter__(self):
436 return iter(self.words)
437
438 - def __contains__(self, item):
439 return item in self.words
440
441 - def __getslice__(self, i, j):
442 return self.words[i:j]
443
444 - def __nonzero__(self):
445 return 1
446
447 - def __len__(self):
448 """ 449 >>> from nltk.wordnet import * 450 >>> len(N['dog'][0].synset) 451 3 452 """ 453 return len(self.words)
454
455 - def max_depth(self):
456 """ 457 @return: The length of the longest hypernym path from this synset to the root. 458 """ 459 460 if not self._max_depth: 461 if self[HYPERNYM] == []: 462 self._max_depth = 0 463 else: 464 self._max_depth = 1 + max(h.max_depth() for h in self[HYPERNYM]) 465 return self._max_depth
466
467 - def min_depth(self):
468 """ 469 @return: The length of the shortest hypernym path from this synset to the root. 470 """ 471 472 if not self._min_depth: 473 if self[HYPERNYM] == []: 474 self._min_depth = 0 475 else: 476 self._min_depth = 1 + min(h.min_depth() for h in self[HYPERNYM]) 477 return self._min_depth
478
479 - def closure(self, rel, depth=-1):
480 """Return the transitive closure of source under the rel relationship, breadth-first 481 482 >>> dog = N['dog'][0] 483 >>> dog.closure(HYPERNYM) 484 [{noun: dog, domestic dog, Canis familiaris}, {noun: canine, canid}, {noun: carnivore}, {noun: placental, placental mammal, eutherian, eutherian mammal}, {noun: mammal, mammalian}, {noun: vertebrate, craniate}, {noun: chordate}, {noun: animal, animate being, beast, brute, creature, fauna}, {noun: organism, being}, {noun: living thing, animate thing}, {noun: object, physical object}, {noun: physical entity}, {noun: entity}] 485 """ 486 from nltk.util import breadth_first 487 synset_offsets = [] 488 for synset in breadth_first(self, lambda s:s[rel], depth): 489 if synset.offset != self.offset and synset.offset not in synset_offsets: 490 synset_offsets.append(synset.offset) 491 yield synset
492 # return synsets 493
494 - def hypernym_paths(self):
495 """ 496 Get the path(s) from this synset to the root, where each path is a 497 list of the synset nodes traversed on the way to the root. 498 499 @return: A list of lists, where each list gives the node sequence 500 connecting the initial L{Synset} node and a root node. 501 """ 502 paths = [] 503 504 hypernyms = self[HYPERNYM] 505 if len(hypernyms) == 0: 506 paths = [[self]] 507 508 for hypernym in hypernyms: 509 for ancestor_list in hypernym.hypernym_paths(): 510 ancestor_list.append(self) 511 paths.append(ancestor_list) 512 return paths
513
514 - def hypernym_distances(self, distance, verbose=False):
515 """ 516 Get the path(s) from this synset to the root, counting the distance 517 of each node from the initial node on the way. A list of 518 (synset, distance) tuples is returned. 519 520 @type distance: C{int} 521 @param distance: the distance (number of edges) from this hypernym to 522 the original hypernym L{Synset} on which this method was called. 523 @return: A list of (L{Synset}, int) tuples where each L{Synset} is 524 a hypernym of the first L{Synset}. 525 """ 526 distances = set([(self, distance)]) 527 528 for hypernym in self[HYPERNYM]: 529 distances |= hypernym.hypernym_distances(distance+1, verbose=False) 530 if verbose: 531 print "> Hypernym Distances:", self, string.join(synset.__str__() + ":" + `dist` for synset, dist in distances) 532 return distances
533
534 - def shortest_path_distance(self, other):
535 """ 536 Returns the distance of the shortest path linking the two synsets (if 537 one exists). For each synset, all the ancestor nodes and their distances 538 are recorded and compared. The ancestor node common to both synsets that 539 can be reached with the minimum number of traversals is used. If no 540 ancestor nodes are common, -1 is returned. If a node is compared with 541 itself 0 is returned. 542 543 @type other: L{Synset} 544 @param other: The Synset to which the shortest path will be found. 545 @return: The number of edges in the shortest path connecting the two 546 nodes, or -1 if no path exists. 547 """ 548 549 if self == other: return 0 550 551 path_distance = -1 552 553 dist_list1 = self.hypernym_distances(0) 554 dist_dict1 = {} 555 556 dist_list2 = other.hypernym_distances(0) 557 dist_dict2 = {} 558 559 # Transform each distance list into a dictionary. In cases where 560 # there are duplicate nodes in the list (due to there being multiple 561 # paths to the root) the duplicate with the shortest distance from 562 # the original node is entered. 563 564 for (l, d) in [(dist_list1, dist_dict1), (dist_list2, dist_dict2)]: 565 for (key, value) in l: 566 if key in d: 567 if value < d[key]: 568 d[key] = value 569 else: 570 d[key] = value 571 572 # For each ancestor synset common to both subject synsets, find the 573 # connecting path length. Return the shortest of these. 574 575 for synset1 in dist_dict1.keys(): 576 for synset2 in dist_dict2.keys(): 577 if synset1 == synset2: 578 new_distance = dist_dict1[synset1] + dist_dict2[synset2] 579 if path_distance < 0 or new_distance < path_distance: 580 path_distance = new_distance 581 582 return path_distance
583
584 - def tree(self, rel, depth=-1, cut_mark=None):
585 """ 586 >>> dog = N['dog'][0] 587 >>> from pprint import pprint 588 >>> pprint(dog.tree(HYPERNYM)) 589 ['dog' in {noun: dog, domestic dog, Canis familiaris}, 590 [{noun: canine, canid}, 591 [{noun: carnivore}, 592 [{noun: placental, placental mammal, eutherian, eutherian mammal}, 593 [{noun: mammal, mammalian}, 594 [{noun: vertebrate, craniate}, 595 [{noun: chordate}, 596 [{noun: animal, animate being, beast, brute, creature, fauna}, 597 [{noun: organism, being}, 598 [{noun: living thing, animate thing}, 599 [{noun: object, physical object}, 600 [{noun: physical entity}, [{noun: entity}]]]]]]]]]]]]] 601 """ 602 603 tree = [self] 604 if depth != 0: 605 tree += [x.tree(rel, depth-1, cut_mark) for x in self[rel]] 606 elif cut_mark: 607 tree += [cut_mark] 608 return tree
609 610 # interface to similarity methods 611
612 - def path_similarity(self, other, verbose=False):
613 return similarity.path_similarity(self, other, verbose)
614
615 - def lch_similarity(self, other, verbose=False):
616 return similarity.lch_similarity(self, other, verbose)
617
618 - def wup_similarity(self, other, verbose=False):
619 return similarity.wup_similarity(self, other, verbose)
620
621 - def res_similarity(self, other, ic, verbose=False):
622 return similarity.res_similarity(self, other, ic, verbose)
623
624 - def jcn_similarity(self, other, ic, verbose=False):
625 return similarity.jcn_similarity(self, other, ic, verbose)
626
627 - def lin_similarity(self, other, ic, verbose=False):
628 return similarity.lin_similarity(self, other, ic, verbose)
629 630 631 # Lexical Relations 632 633 _RELATION_TABLE = { 634 '!': ANTONYM, '@': HYPERNYM, '~': HYPONYM, '=': ATTRIBUTE, 635 '^': ALSO_SEE, '*': ENTAILMENT, '>': CAUSE, '$': VERB_GROUP, 636 '#m': MEMBER_MERONYM, '#s': SUBSTANCE_MERONYM, '#p': PART_MERONYM, 637 '%m': MEMBER_HOLONYM, '%s': SUBSTANCE_HOLONYM, '%p': PART_HOLONYM, 638 '&': SIMILAR, '<': PARTICIPLE_OF, '\\': PERTAINYM, '+': FRAMES, 639 ';c': CLASSIF_CATEGORY, ';u': CLASSIF_USAGE, ';r': CLASSIF_REGIONAL, 640 '-c': CLASS_CATEGORY, '-u': CLASS_USAGE, '-r': CLASS_REGIONAL, 641 '@i': INSTANCE_HYPERNYM,'~i': INSTANCE_HYPONYM, 642 }
643 644 # Private Utility Functions 645 646 -def _index(key, sequence, testfn=None, keyfn=None):
647 """ 648 Return the index of key within sequence, using testfn for 649 comparison and transforming items of sequence by keyfn first. 650 651 >>> _index('e', 'hello') 652 1 653 >>> _index('E', 'hello', testfn=_equalsIgnoreCase) 654 1 655 >>> _index('x', 'hello') 656 """ 657 index = 0 658 for element in sequence: 659 value = element 660 if keyfn: 661 value = keyfn(value) 662 if (not testfn and value == key) or (testfn and testfn(value, key)): 663 return index 664 index = index + 1 665 return None
666
667 -def _partition(sequence, size, count):
668 """ 669 Partition sequence into C{count} subsequences of 670 length C{size}, and a remainder. 671 672 Return C{(partitions, remainder)}, where C{partitions} is a sequence of 673 C{count} subsequences of cardinality C{size}, and 674 C{apply(append, partitions) + remainder == sequence}. 675 """ 676 677 partitions = [] 678 for index in range(0, size * count, size): 679 partitions.append(sequence[index:index + size]) 680 return (partitions, sequence[size * count:])
681
682 -def _compareInstances(a, b, fields):
683 """ 684 Return -1, 0, or 1 according to a comparison first by type, 685 then by class, and finally by each of fields. Used when comparing two 686 Wordnet objects (Synsets, Words, or Senses) to each other. 687 """ 688 if not hasattr(b, '__class__'): 689 return cmp(type(a), type(b)) 690 elif a.__class__ != b.__class__: 691 return cmp(a.__class__, b.__class__) 692 693 for field in fields: 694 diff = cmp(getattr(a, field), getattr(b, field)) 695 if diff: return diff 696 697 return 0
698
699 -def _equalsIgnoreCase(a, b):
700 """ 701 Return true iff a and b have the same lowercase representation. 702 703 >>> _equalsIgnoreCase('dog', 'Dog') 704 True 705 >>> _equalsIgnoreCase('dOg', 'DOG') 706 True 707 """ 708 return a == b or a.lower() == b.lower()
709
710 711 712 -def demo():
713 from nltk import wordnet 714 from pprint import pprint 715 716 dog = wordnet.N['dog'] 717 cat = wordnet.N['cat'] 718 719 print "wordnet.N['dog']" 720 print 'dog' in wordnet.N 721 print dog 722 print dog.pos, dog.form 723 print dog.taggedSenseCount 724 print dog.synsets() 725 print dog.isTagged() 726 # ADJ['clear'].getAdjectivePositions() 727 # N['cat'] < N['dog'] 728 # N['dog'] < V['dog'] 729 730 print "Verb Frames:", 731 print wordnet.V['think'][0].verbFrameStrings 732 733 print "Relations:" 734 print dog[0].relations() 735 print dog[0][wordnet.HYPERNYM] 736 737 print "Glosses:" 738 print dog[0].gloss 739 print dog[0].relation(wordnet.HYPERNYM)[0].gloss 740 741 print 742 print "Paths and Distances:" 743 print 744 745 print dog[0].hypernym_paths() 746 print dog[0].hypernym_distances(0) 747 print dog[0].shortest_path_distance(cat[0]) 748 749 print 750 print "Closures and Trees:" 751 print 752 753 pprint(wordnet.ADJ['red'][0].closure(wordnet.SIMILAR, depth=1)) 754 pprint(wordnet.ADJ['red'][0].closure(wordnet.SIMILAR, depth=2)) 755 pprint(dog[0].tree(wordnet.HYPERNYM)) 756 pprint(dog[0].tree(wordnet.HYPERNYM, depth=2, cut_mark = '...')) 757 758 entity = wordnet.N["entity"] 759 print entity, entity[0] 760 print entity[0][wordnet.HYPONYM] 761 pprint(entity[0].tree(wordnet.HYPONYM, depth=1), indent=4) 762 abstract_entity = wordnet.N["abstract entity"] 763 print abstract_entity, abstract_entity[0] 764 print abstract_entity[0][wordnet.HYPONYM] 765 pprint(abstract_entity[0].tree(wordnet.HYPONYM, depth=1), indent=4) 766 767 # Adjectives that are transitively SIMILAR to any of the senses of 'red' 768 #flatten1(map(lambda sense:closure(sense, SIMILAR), ADJ['red'])) # too verbose 769 770 print "All the words in the hyponym synsets of dog[0]" 771 print [word for synset in dog[0][wordnet.HYPONYM] for word in synset] 772 773 print "Hyponyms of the first (and only) sense of 'animal' that are homophonous with verbs:" 774 print [word for synset in wordnet.N['animal'][0].closure(wordnet.HYPONYM) for word in synset if word in wordnet.V] 775 776 # BROKEN 777 print "Senses of 'raise'(v.) and 'lower'(v.) that are antonyms:" 778 print filter(lambda p:p[0] in p[1][wordnet.ANTONYM], [(r,l) for r in wordnet.V['raise'] for l in wordnet.V['lower']]) 779 780 print 781 print "Similarity: dog~cat" 782 print 783 784 print "Path Distance Similarity:", 785 print dog[0].path_similarity(cat[0]) 786 print "Leacock Chodorow Similarity:", 787 print dog[0].lch_similarity(cat[0]) 788 print "Wu Palmer Similarity:", 789 print dog[0].wup_similarity(cat[0])
790 791 # set up the data file 792 # print "Resnik Similarity:", 793 # print dog[0].resnik_similarity(cat[0], datafile) 794 # print "Jiang-Conrath Similarity:", 795 # print dog[0].jiang_conrath_similarity(cat[0], datafile) 796 # print "Lin Similarity:", 797 # print dog[0].lin_similarity(cat[0], datafile) 798 799 if __name__ == '__main__': 800 demo() 801