Package nltk :: Package corpus :: Package reader :: Module wordlist
[hide private]
[frames] | no frames]

Source Code for Module nltk.corpus.reader.wordlist

 1  # Natural Language Toolkit: Word List Corpus Reader 
 2  # 
 3  # Copyright (C) 2001-2011 NLTK Project 
 4  # Author: Steven Bird <sb@ldc.upenn.edu> 
 5  #         Edward Loper <edloper@gradient.cis.upenn.edu> 
 6  # URL: <http://www.nltk.org/> 
 7  # For license information, see LICENSE.TXT 
 8   
 9  from nltk.tokenize import line_tokenize 
10   
11  from util import * 
12  from api import * 
13   
14 -class WordListCorpusReader(CorpusReader):
15 """ 16 List of words, one per line. Blank lines are ignored. 17 """
18 - def words(self, fileids=None):
19 return line_tokenize(self.raw(fileids))
20
21 - def raw(self, fileids=None):
22 if fileids is None: fileids = self._fileids 23 elif isinstance(fileids, basestring): fileids = [fileids] 24 return concat([self.open(f).read() for f in fileids])
25 26
27 -class SwadeshCorpusReader(WordListCorpusReader):
28 - def entries(self, fileids=None):
29 """ 30 @return: a tuple of words for the specified fileids. 31 """ 32 if not fileids: 33 fileids = self.fileids() 34 35 wordlists = [self.words(f) for f in fileids] 36 return zip(*wordlists)
37