Package nltk :: Package corpus :: Package reader :: Module toolbox
[hide private]
[frames] | no frames]

Source Code for Module nltk.corpus.reader.toolbox

 1  # Natural Language Toolkit: Toolbox Reader 
 2  # 
 3  # Copyright (C) 2001-2011 NLTK Project 
 4  # Author: Greg Aumann <greg_aumann@sil.org> 
 5  #         Stuart Robinson <Stuart.Robinson@mpi.nl> 
 6  #         Steven Bird <sb@csse.unimelb.edu.au> 
 7  # URL: <http://www.nltk.org/> 
 8  # For license information, see LICENSE.TXT 
 9   
10  """ 
11  Module for reading, writing and manipulating  
12  Toolbox databases and settings fileids. 
13  """ 
14   
15  import os 
16  import re 
17  import codecs 
18   
19  from nltk.toolbox import ToolboxData 
20   
21  from util import * 
22  from api import * 
23   
24 -class ToolboxCorpusReader(CorpusReader):
25 - def xml(self, fileids, key=None):
26 return concat([ToolboxData(path, enc).parse(key) 27 for (path, enc) in self.abspaths(fileids, True)])
28
29 - def fields(self, fileids, strip=True, unwrap=True, encoding=None, 30 errors='strict', unicode_fields=None):
31 return concat([list(ToolboxData(fileid,enc).fields( 32 strip, unwrap, encoding, errors, unicode_fields)) 33 for (fileid, enc) 34 in self.abspaths(fileids, include_encoding=True)])
35 36 # should probably be done lazily:
37 - def entries(self, fileids, **kwargs):
38 if 'key' in kwargs: 39 key = kwargs['key'] 40 del kwargs['key'] 41 else: 42 key = 'lx' # the default key in MDF 43 entries = [] 44 for marker, contents in self.fields(fileids, **kwargs): 45 if marker == key: 46 entries.append((contents, [])) 47 else: 48 try: 49 entries[-1][-1].append((marker, contents)) 50 except IndexError: 51 pass 52 return entries
53
54 - def words(self, fileids, key='lx'):
55 return [contents for marker, contents in self.fields(fileids) if marker == key]
56
57 - def raw(self, fileids):
58 if fileids is None: fileids = self._fileids 59 elif isinstance(fileids, basestring): fileids = [fileids] 60 return concat([self.open(f).read() for f in fileids])
61 62
63 -def demo():
64 pass
65 66 if __name__ == '__main__': 67 demo() 68