Package nltk :: Package misc :: Module wordfinder
[hide private]
[frames] | no frames]

Source Code for Module nltk.misc.wordfinder

  1  # Natural Language Toolkit: Word Finder 
  2  # 
  3  # Copyright (C) 2001-2011 NLTK Project 
  4  # Author: Steven Bird <sb@csse.unimelb.edu.au> 
  5  # URL: <http://www.nltk.org/> 
  6  # For license information, see LICENSE.TXT 
  7   
  8  # Simplified from PHP version by Robert Klein <brathna@gmail.com> 
  9  # http://fswordfinder.sourceforge.net/ 
 10   
 11  import random 
 12  from string import strip 
 13   
 14  # reverse a word with probability 0.5 
15 -def revword(word):
16 if random.randint(1,2) == 1: 17 return word[::-1] 18 return word
19 20 # try to insert word at position x,y; direction encoded in xf,yf
21 -def step(word, x, xf, y, yf, grid):
22 for i in range(len(word)): 23 if grid[xf(i)][yf(i)] != "" and grid[xf(i)][yf(i)] != word[i]: 24 return False 25 for i in range(len(word)): 26 grid[xf(i)][yf(i)] = word[i] 27 return True
28 29 # try to insert word at position x,y, in direction dir
30 -def check(word, dir, x, y, grid, rows, cols):
31 if dir==1: 32 if x-len(word)<0 or y-len(word)<0: 33 return False 34 return step(word, x, lambda i:x-i, y, lambda i:y-i, grid) 35 elif dir==2: 36 if x-len(word)<0: 37 return False 38 return step(word, x, lambda i:x-i, y, lambda i:y, grid) 39 elif dir==3: 40 if x-len(word)<0 or y+(len(word)-1)>=cols: 41 return False 42 return step(word, x, lambda i:x-i, y, lambda i:y+i, grid) 43 elif dir==4: 44 if y-len(word)<0: 45 return False 46 return step(word, x, lambda i:x, y, lambda i:y-i, grid)
47
48 -def wordfinder(words, rows=20, cols=20, attempts=50, 49 alph='ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
50 """ 51 Attempt to arrange words into a letter-grid with the specified 52 number of rows and columns. Try each word in several positions 53 and directions, until it can be fitted into the grid, or the 54 maximum number of allowable attempts is exceeded. Returns a tuple 55 consisting of the grid and the words that were successfully 56 placed. 57 58 @param words: the list of words to be put into the grid 59 @type words: C(list) 60 @param rows: the number of rows in the grid 61 @type rows: C(int) 62 @param cols: the number of columns in the grid 63 @type cols: C(int) 64 @param attempts: the number of times to attempt placing a word 65 @type attempts: C(int) 66 @param alph: the alpabet, to be used for filling blank cells 67 @type alph: C(list) 68 @rtype: C(tuple) 69 """ 70 71 # place longer words first 72 words.sort(cmp=lambda x,y:cmp(len(x),len(y)), reverse=True) 73 74 grid = [] # the letter grid 75 used = [] # the words we used 76 77 # initialize the grid 78 for i in range(rows): 79 grid.append([""] * cols) 80 81 # try to place each word 82 for word in words: 83 word = strip(word).upper() # normalize 84 save = word # keep a record of the word 85 word = revword(word) 86 for attempt in range(attempts): 87 r = random.randint(0, len(word)) 88 dir = random.choice([1,2,3,4]) 89 x = random.randint(0,rows) 90 y = random.randint(0,cols) 91 if dir==1: x+=r; y+=r 92 elif dir==2: x+=r 93 elif dir==3: x+=r; y-=r 94 elif dir==4: y+=r 95 if 0<=x<rows and 0<=y<cols: 96 if check(word, dir, x, y, grid, rows, cols): 97 # used.append((save, dir, x, y, word)) 98 used.append(save) 99 break 100 101 # Fill up the remaining spaces 102 for i in range(rows): 103 for j in range(cols): 104 if grid[i][j] == '': 105 grid[i][j] = random.choice(alph) 106 107 return grid, used
108
109 -def word_finder():
110 from nltk.corpus import words 111 wordlist = words.words() 112 random.shuffle(wordlist) 113 wordlist = wordlist[:200] 114 wordlist = [w for w in wordlist if 3 <= len(w) <= 12] 115 grid, used = wordfinder(wordlist) 116 117 print "Word Finder\n" 118 for i in range(len(grid)): 119 for j in range(len(grid[i])): 120 print grid[i][j], 121 print 122 print 123 124 for i in range(len(used)): 125 print "%d:" % (i+1), used[i]
126 127 if __name__ == '__main__': 128 word_finder() 129