1
2
3
4
5
6
7
8 """
9 A utility for displaying lexical dispersion.
10 """
11
13 """
14 Generate a lexical dispersion plot.
15
16 @param text: The source text
17 @type text: C{list} or C{enum} of C{str}
18 @param words: The target words
19 @type words: C{list} of C{str}
20 """
21
22 try:
23 import pylab
24 except ImportError:
25 raise ValueError('The plot function requires the matplotlib package (aka pylab).'
26 'See http://matplotlib.sourceforge.net/')
27
28 text = list(text)
29 words.reverse()
30 points = [(x,y) for x in range(len(text))
31 for y in range(len(words))
32 if text[x] == words[y]]
33 if points:
34 x, y = zip(*points)
35 else:
36 x = y = ()
37 pylab.plot(x, y, "b|", scalex=.1)
38 pylab.yticks(range(len(words)), words, color="b")
39 pylab.ylim(-1, len(words))
40 pylab.title("Lexical Dispersion Plot")
41 pylab.xlabel("Word Offset")
42 pylab.show()
43
44 if __name__ == '__main__':
45 from nltk.corpus import gutenberg
46 words = ['Elinor', 'Marianne', 'Edward', 'Willoughby']
47 dispersion_plot(gutenberg.words('austen-sense.txt'), words)
48