Package nltk :: Package corpus :: Module europarl_raw
[hide private]
[frames] | no frames]

Source Code for Module nltk.corpus.europarl_raw

 1  # Natural Language Toolkit: Europarl Corpus Readers 
 2  # 
 3  # Copyright (C) 2001-2011 NLTK Project 
 4  # Author:  Nitin Madnani <nmadnani@umiacs.umd.edu> 
 5  # URL: <http://www.nltk.org/> 
 6  # For license information, see LICENSE.TXT 
 7   
 8  import re 
 9  from util import LazyCorpusLoader 
10  from reader import * 
11   
12  # Create a new corpus reader instance for each European language 
13  danish = LazyCorpusLoader( 
14      'europarl_raw/danish', EuroparlCorpusReader, r'ep-.*\.da', encoding='utf-8') 
15   
16  dutch = LazyCorpusLoader( 
17      'europarl_raw/dutch', EuroparlCorpusReader, r'ep-.*\.nl', encoding='utf-8') 
18   
19  english = LazyCorpusLoader( 
20      'europarl_raw/english', EuroparlCorpusReader, r'ep-.*\.en', encoding='utf-8') 
21   
22  finnish = LazyCorpusLoader( 
23      'europarl_raw/finnish', EuroparlCorpusReader, r'ep-.*\.fi', encoding='utf-8') 
24   
25  french = LazyCorpusLoader( 
26      'europarl_raw/french', EuroparlCorpusReader, r'ep-.*\.fr', encoding='utf-8') 
27   
28  german = LazyCorpusLoader( 
29      'europarl_raw/german', EuroparlCorpusReader, r'ep-.*\.de', encoding='utf-8') 
30   
31  greek = LazyCorpusLoader( 
32      'europarl_raw/greek', EuroparlCorpusReader, r'ep-.*\.el', encoding='utf-8') 
33   
34  italian = LazyCorpusLoader( 
35      'europarl_raw/italian', EuroparlCorpusReader, r'ep-.*\.it', encoding='utf-8') 
36   
37  portuguese = LazyCorpusLoader( 
38      'europarl_raw/portuguese', EuroparlCorpusReader, r'ep-.*\.pt', encoding='utf-8') 
39   
40  spanish = LazyCorpusLoader( 
41      'europarl_raw/spanish', EuroparlCorpusReader, r'ep-.*\.es', encoding='utf-8') 
42   
43  swedish = LazyCorpusLoader( 
44      'europarl_raw/swedish', EuroparlCorpusReader, r'ep-.*\.sv', encoding='utf-8') 
45