1
2
3
4
5
6
7
8
9 """
10 A classifier model based on maximum entropy modeling framework. This
11 framework considers all of the probability distributions that are
12 empirically consistant with the training data; and chooses the
13 distribution with the highest entropy. A probability distribution is
14 X{empirically consistant} with a set of training data if its estimated
15 frequency with which a class and a feature vector value co-occur is
16 equal to the actual frequency in the data.
17
18 Terminology: 'feature'
19 ======================
20 The term I{feature} is usually used to refer to some property of an
21 unlabeled token. For example, when performing word sense
22 disambiguation, we might define a C{'prevword'} feature whose value is
23 the word preceeding the target word. However, in the context of
24 maxent modeling, the term I{feature} is typically used to refer to a
25 property of a X{labeled} token. In order to prevent confusion, we
26 will introduce two distinct terms to disambiguate these two different
27 concepts:
28
29 - An X{input-feature} is a property of an unlabeled token.
30 - A X{joint-feature} is a property of a labeled token.
31
32 In the rest of the C{nltk.classify} module, the term X{features} is
33 used to refer to what we will call X{input-features} in this module.
34
35 In literature that describes and discusses maximum entropy models,
36 input-features are typically called X{contexts}, and joint-features
37 are simply referred to as X{features}.
38
39 Converting Input-Features to Joint-Features
40 -------------------------------------------
41 In maximum entropy models, joint-features are required to have numeric
42 values. Typically, each input-feature C{input_feat} is mapped to a
43 set of joint-features of the form::
44
45 joint_feat(token, label) = { 1 if input_feat(token) == feat_val
46 { and label == some_label
47 {
48 { 0 otherwise
49
50 For all values of C{feat_val} and C{some_label}. This mapping is
51 performed by classes that implement the L{MaxentFeatureEncodingI}
52 interface.
53 """
54 __docformat__ = 'epytext en'
55
56 import numpy
57 import time
58 import tempfile
59 import os
60 import gzip
61
62 from nltk.compat import defaultdict
63 from nltk.util import OrderedDict
64 from nltk.probability import *
65
66 import nltk.classify.util
67 from api import *
68 from util import attested_labels, CutoffChecker
69 from megam import call_megam, write_megam_file, parse_megam_weights
70 from tadm import call_tadm, write_tadm_file, parse_tadm_weights
77 """
78 A maximum entropy classifier (also known as a X{conditional
79 exponential classifier}). This classifier is parameterized by a
80 set of X{weights}, which are used to combine the joint-features
81 that are generated from a featureset by an X{encoding}. In
82 particular, the encoding maps each C{(featureset, label)} pair to
83 a vector. The probability of each label is then computed using
84 the following equation::
85
86 dotprod(weights, encode(fs,label))
87 prob(fs|label) = ---------------------------------------------------
88 sum(dotprod(weights, encode(fs,l)) for l in labels)
89
90 Where C{dotprod} is the dot product::
91
92 dotprod(a,b) = sum(x*y for (x,y) in zip(a,b))
93 """
94 - def __init__(self, encoding, weights, logarithmic=True):
95 """
96 Construct a new maxent classifier model. Typically, new
97 classifier models are created using the L{train()} method.
98
99 @type encoding: L{MaxentFeatureEncodingI}
100 @param encoding: An encoding that is used to convert the
101 featuresets that are given to the C{classify} method into
102 joint-feature vectors, which are used by the maxent
103 classifier model.
104
105 @type weights: C{list} of C{float}
106 @param weights: The feature weight vector for this classifier.
107
108 @type logarithmic: C{bool}
109 @param logarithmic: If false, then use non-logarithmic weights.
110 """
111 self._encoding = encoding
112 self._weights = weights
113 self._logarithmic = logarithmic
114
115 assert encoding.length() == len(weights)
116
118 return self._encoding.labels()
119
121 """
122 Set the feature weight vector for this classifier.
123 @param new_weights: The new feature weight vector.
124 @type new_weights: C{list} of C{float}
125 """
126 self._weights = new_weights
127 assert (self._encoding.length() == len(new_weights))
128
130 """
131 @return: The feature weight vector for this classifier.
132 @rtype: C{list} of C{float}
133 """
134 return self._weights
135
138
140 prob_dict = {}
141 for label in self._encoding.labels():
142 feature_vector = self._encoding.encode(featureset, label)
143
144 if self._logarithmic:
145 total = 0.0
146 for (f_id, f_val) in feature_vector:
147 total += self._weights[f_id] * f_val
148 prob_dict[label] = total
149
150 else:
151 prod = 1.0
152 for (f_id, f_val) in feature_vector:
153 prod *= self._weights[f_id] ** f_val
154 prob_dict[label] = prod
155
156
157 return DictionaryProbDist(prob_dict, log=self._logarithmic,
158 normalize=True)
159
160 - def explain(self, featureset, columns=4):
161 """
162 Print a table showing the effect of each of the features in
163 the given feature set, and how they combine to determine the
164 probabilities of each label for that featureset.
165 """
166 descr_width = 50
167 TEMPLATE = ' %-'+str(descr_width-2)+'s%s%8.3f'
168
169 pdist = self.prob_classify(featureset)
170 labels = sorted(pdist.samples(), key=pdist.prob, reverse=True)
171 labels = labels[:columns]
172 print ' Feature'.ljust(descr_width)+''.join(
173 '%8s' % str(l)[:7] for l in labels)
174 print ' '+'-'*(descr_width-2+8*len(labels))
175 sums = defaultdict(int)
176 for i, label in enumerate(labels):
177 feature_vector = self._encoding.encode(featureset, label)
178 feature_vector.sort(key=lambda (fid,_): abs(self._weights[fid]),
179 reverse=True)
180 for (f_id, f_val) in feature_vector:
181 if self._logarithmic: score = self._weights[f_id] * f_val
182 else: score = self._weights[fid] ** f_val
183 descr = self._encoding.describe(f_id)
184 descr = descr.split(' and label is ')[0]
185 descr += ' (%s)' % f_val
186 if len(descr) > 47: descr = descr[:44]+'...'
187 print TEMPLATE % (descr, i*8*' ', score)
188 sums[label] += score
189 print ' '+'-'*(descr_width-1+8*len(labels))
190 print ' TOTAL:'.ljust(descr_width)+''.join(
191 '%8.3f' % sums[l] for l in labels)
192 print ' PROBS:'.ljust(descr_width)+''.join(
193 '%8.3f' % pdist.prob(l) for l in labels)
194
209
211 return ('<ConditionalExponentialClassifier: %d labels, %d features>' %
212 (len(self._encoding.labels()), self._encoding.length()))
213
214
215
216 ALGORITHMS = ['GIS', 'IIS', 'CG', 'BFGS', 'Powell', 'LBFGSB',
217 'Nelder-Mead', 'MEGAM', 'TADM']
218
219 @classmethod
220 - def train(cls, train_toks, algorithm=None, trace=3, encoding=None,
221 labels=None, sparse=True, gaussian_prior_sigma=0, **cutoffs):
222 """
223 Train a new maxent classifier based on the given corpus of
224 training samples. This classifier will have its weights
225 chosen to maximize entropy while remaining empirically
226 consistent with the training corpus.
227
228 @rtype: L{MaxentClassifier}
229 @return: The new maxent classifier
230
231 @type train_toks: C{list}
232 @param train_toks: Training data, represented as a list of
233 pairs, the first member of which is a featureset,
234 and the second of which is a classification label.
235
236 @type algorithm: C{str}
237 @param algorithm: A case-insensitive string, specifying which
238 algorithm should be used to train the classifier. The
239 following algorithms are currently available.
240
241 - Iterative Scaling Methods
242 - C{'GIS'}: Generalized Iterative Scaling
243 - C{'IIS'}: Improved Iterative Scaling
244
245 - Optimization Methods (require C{scipy})
246 - C{'CG'}: Conjugate gradient
247 - C{'BFGS'}: Broyden-Fletcher-Goldfarb-Shanno algorithm
248 - C{'Powell'}: Powell agorithm
249 - C{'LBFGSB'}: A limited-memory variant of the BFGS algorithm
250 - C{'Nelder-Mead'}: The Nelder-Mead algorithm
251
252 - External Libraries
253 - C{'megam'}: LM-BFGS algorithm, with training performed
254 by an U{megam <http://www.cs.utah.edu/~hal/megam/>}.
255 (requires that C{megam} be installed.)
256
257 The default algorithm is C{'CG'} if C{'scipy'} is
258 installed; and C{'iis'} otherwise.
259
260 @type trace: C{int}
261 @param trace: The level of diagnostic tracing output to produce.
262 Higher values produce more verbose output.
263
264 @type encoding: L{MaxentFeatureEncodingI}
265 @param encoding: A feature encoding, used to convert featuresets
266 into feature vectors. If none is specified, then a
267 L{BinaryMaxentFeatureEncoding} will be built based on the
268 features that are attested in the training corpus.
269
270 @type labels: C{list} of C{str}
271 @param labels: The set of possible labels. If none is given, then
272 the set of all labels attested in the training data will be
273 used instead.
274
275 @param sparse: If true, then use sparse matrices instead of
276 dense matrices. Currently, this is only supported by
277 the scipy (optimization method) algorithms. For other
278 algorithms, its value is ignored.
279
280 @param gaussian_prior_sigma: The sigma value for a gaussian
281 prior on model weights. Currently, this is supported by
282 the scipy (optimization method) algorithms and C{megam}.
283 For other algorithms, its value is ignored.
284
285 @param cutoffs: Arguments specifying various conditions under
286 which the training should be halted. (Some of the cutoff
287 conditions are not supported by some algorithms.)
288
289 - C{max_iter=v}: Terminate after C{v} iterations.
290 - C{min_ll=v}: Terminate after the negative average
291 log-likelihood drops under C{v}.
292 - C{min_lldelta=v}: Terminate if a single iteration improves
293 log likelihood by less than C{v}.
294 - C{tolerance=v}: Terminate a scipy optimization method when
295 improvement drops below a tolerance level C{v}. The
296 exact meaning of this tolerance depends on the scipy
297 algorithm used. See C{scipy} documentation for more
298 info. Default values: 1e-3 for CG, 1e-5 for LBFGSB,
299 and 1e-4 for other algorithms. I{(C{scipy} only)}
300 """
301 if algorithm is None:
302 try:
303 import scipy
304 algorithm = 'cg'
305 except ImportError:
306 algorithm = 'iis'
307 for key in cutoffs:
308 if key not in ('max_iter', 'min_ll', 'min_lldelta', 'tolerance',
309 'max_acc', 'min_accdelta', 'count_cutoff',
310 'norm', 'explicit', 'bernoulli'):
311 raise TypeError('Unexpected keyword arg %r' % key)
312 algorithm = algorithm.lower()
313 if algorithm == 'iis':
314 return train_maxent_classifier_with_iis(
315 train_toks, trace, encoding, labels, **cutoffs)
316 elif algorithm == 'gis':
317 return train_maxent_classifier_with_gis(
318 train_toks, trace, encoding, labels, **cutoffs)
319 elif algorithm in cls._SCIPY_ALGS:
320 return train_maxent_classifier_with_scipy(
321 train_toks, trace, encoding, labels,
322 cls._SCIPY_ALGS[algorithm], sparse,
323 gaussian_prior_sigma, **cutoffs)
324 elif algorithm == 'megam':
325 return train_maxent_classifier_with_megam(
326 train_toks, trace, encoding, labels,
327 gaussian_prior_sigma, **cutoffs)
328 elif algorithm == 'tadm':
329 kwargs = cutoffs
330 kwargs['trace'] = trace
331 kwargs['encoding'] = encoding
332 kwargs['labels'] = labels
333 kwargs['gaussian_prior_sigma'] = gaussian_prior_sigma
334 return TadmMaxentClassifier.train(train_toks, **kwargs)
335 else:
336 raise ValueError('Unknown algorithm %s' % algorithm)
337
338 _SCIPY_ALGS = {'cg':'CG', 'bfgs':'BFGS', 'powell':'Powell',
339 'lbfgsb':'LBFGSB', 'nelder-mead':'Nelder-Mead'}
340
351 """
352 A mapping that converts a set of input-feature values to a vector
353 of joint-feature values, given a label. This conversion is
354 necessary to translate featuresets into a format that can be used
355 by maximum entropy models.
356
357 The set of joint-features used by a given encoding is fixed, and
358 each index in the generated joint-feature vectors corresponds to a
359 single joint-feature. The length of the generated joint-feature
360 vectors is therefore constant (for a given encoding).
361
362 Because the joint-feature vectors generated by
363 C{MaxentFeatureEncodingI} are typically very sparse, they are
364 represented as a list of C{(index, value)} tuples, specifying the
365 value of each non-zero joint-feature.
366
367 Feature encodings are generally created using the L{train()}
368 method, which generates an appropriate encoding based on the
369 input-feature values and labels that are present in a given
370 corpus.
371 """
372 - def encode(self, featureset, label):
373 """
374 Given a (featureset, label) pair, return the corresponding
375 vector of joint-feature values. This vector is represented as
376 a list of C{(index, value)} tuples, specifying the value of
377 each non-zero joint-feature.
378
379 @type featureset: C{dict}
380 @rtype: C{list} of C{(int, number)}
381 """
382 raise AssertionError('Not implemented')
383
385 """
386 @return: The size of the fixed-length joint-feature vectors
387 that are generated by this encoding.
388 @rtype: C{int}
389 """
390 raise AssertionError('Not implemented')
391
393 """
394 @return: A list of the \"known labels\" -- i.e., all labels
395 C{l} such that C{self.encode(fs,l)} can be a nonzero
396 joint-feature vector for some value of C{fs}.
397 @rtype: C{list}
398 """
399 raise AssertionError('Not implemented')
400
402 """
403 @return: A string describing the value of the joint-feature
404 whose index in the generated feature vectors is C{fid}.
405 @rtype: C{str}
406 """
407 raise AssertionError('Not implemented')
408
409 - def train(cls, train_toks):
410 """
411 Construct and return new feature encoding, based on a given
412 training corpus C{train_toks}.
413
414 @type train_toks: C{list} of C{tuples} of (C{dict}, C{str})
415 @param train_toks: Training data, represented as a list of
416 pairs, the first member of which is a feature dictionary,
417 and the second of which is a classification label.
418 """
419 raise AssertionError('Not implemented')
420
422 """
423 A feature encoding that calls a user-supplied function to map a
424 given featureset/label pair to a sparse joint-feature vector.
425 """
426 - def __init__(self, func, length, labels):
427 """
428 Construct a new feature encoding based on the given function.
429
430 @type func: (callable)
431 @param func: A function that takes two arguments, a featureset
432 and a label, and returns the sparse joint feature vector
433 that encodes them:
434
435 >>> func(featureset, label) -> feature_vector
436
437 This sparse joint feature vector (C{feature_vector}) is a
438 list of C{(index,value)} tuples.
439
440 @type length: C{int}
441 @param length: The size of the fixed-length joint-feature
442 vectors that are generated by this encoding.
443
444 @type labels: C{list}
445 @param labels: A list of the \"known labels\" for this
446 encoding -- i.e., all labels C{l} such that
447 C{self.encode(fs,l)} can be a nonzero joint-feature vector
448 for some value of C{fs}.
449 """
450 self._length = length
451 self._func = func
452 self._labels = labels
453
454 - def encode(self, featureset, label):
455 return self._func(featureset, label)
456
459
462
464 return 'no description available'
465
467 """
468 A feature encoding that generates vectors containing a binary
469 joint-features of the form::
470
471 joint_feat(fs, l) = { 1 if (fs[fname] == fval) and (l == label)
472 {
473 { 0 otherwise
474
475 Where C{fname} is the name of an input-feature, C{fval} is a value
476 for that input-feature, and C{label} is a label.
477
478 Typically, these features are constructed based on a training
479 corpus, using the L{train()} method. This method will create one
480 feature for each combination of C{fname}, C{fval}, and C{label}
481 that occurs at least once in the training corpus.
482
483 The C{unseen_features} parameter can be used to add X{unseen-value
484 features}, which are used whenever an input feature has a value
485 that was not encountered in the training corpus. These features
486 have the form::
487
488 joint_feat(fs, l) = { 1 if is_unseen(fname, fs[fname])
489 { and l == label
490 {
491 { 0 otherwise
492
493 Where C{is_unseen(fname, fval)} is true if the encoding does not
494 contain any joint features that are true when C{fs[fname]==fval}.
495
496 The C{alwayson_features} parameter can be used to add X{always-on
497 features}, which have the form::
498
499 joint_feat(fs, l) = { 1 if (l == label)
500 {
501 { 0 otherwise
502
503 These always-on features allow the maxent model to directly model
504 the prior probabilities of each label.
505 """
506 - def __init__(self, labels, mapping, unseen_features=False,
507 alwayson_features=False):
508 """
509 @param labels: A list of the \"known labels\" for this encoding.
510
511 @param mapping: A dictionary mapping from C{(fname,fval,label)}
512 tuples to corresponding joint-feature indexes. These
513 indexes must be the set of integers from 0...len(mapping).
514 If C{mapping[fname,fval,label]=id}, then
515 C{self.encode({..., fname:fval, ...}, label)[id]} is 1;
516 otherwise, it is 0.
517
518 @param unseen_features: If true, then include unseen value
519 features in the generated joint-feature vectors.
520
521 @param alwayson_features: If true, then include always-on
522 features in the generated joint-feature vectors.
523 """
524 if set(mapping.values()) != set(range(len(mapping))):
525 raise ValueError('Mapping values must be exactly the '
526 'set of integers from 0...len(mapping)')
527
528 self._labels = list(labels)
529 """A list of attested labels."""
530
531 self._mapping = mapping
532 """dict mapping from (fname,fval,label) -> fid"""
533
534 self._length = len(mapping)
535 """The length of generated joint feature vectors."""
536
537 self._alwayson = None
538 """dict mapping from label -> fid"""
539
540 self._unseen = None
541 """dict mapping from fname -> fid"""
542
543 if alwayson_features:
544 self._alwayson = dict([(label,i+self._length)
545 for (i,label) in enumerate(labels)])
546 self._length += len(self._alwayson)
547
548 if unseen_features:
549 fnames = set(fname for (fname, fval, label) in mapping)
550 self._unseen = dict([(fname, i+self._length)
551 for (i, fname) in enumerate(fnames)])
552 self._length += len(fnames)
553
554 - def encode(self, featureset, label):
555
556 encoding = []
557
558
559 for fname, fval in featureset.items():
560
561 if (fname, fval, label) in self._mapping:
562 encoding.append((self._mapping[fname, fval, label], 1))
563
564
565 elif self._unseen:
566
567 for label2 in self._labels:
568 if (fname, fval, label2) in self._mapping:
569 break
570
571 else:
572 if fname in self._unseen:
573 encoding.append((self._unseen[fname], 1))
574
575
576 if self._alwayson and label in self._alwayson:
577 encoding.append((self._alwayson[label], 1))
578
579 return encoding
580
582
583 if not isinstance(f_id, (int, long)):
584 raise TypeError('describe() expected an int')
585 try:
586 self._inv_mapping
587 except AttributeError:
588 self._inv_mapping = [-1]*len(self._mapping)
589 for (info, i) in self._mapping.items():
590 self._inv_mapping[i] = info
591
592 if f_id < len(self._mapping):
593 (fname, fval, label) = self._inv_mapping[f_id]
594 return '%s==%r and label is %r' % (fname, fval, label)
595 elif self._alwayson and f_id in self._alwayson.values():
596 for (label, f_id2) in self._alwayson.items():
597 if f_id==f_id2: return 'label is %r' % label
598 elif self._unseen and f_id in self._unseen.values():
599 for (fname, f_id2) in self._unseen.items():
600 if f_id==f_id2: return '%s is unseen' % fname
601 else:
602 raise ValueError('Bad feature id')
603
605
606 return self._labels
607
609
610 return self._length
611
612 @classmethod
613 - def train(cls, train_toks, count_cutoff=0, labels=None, **options):
614 """
615 Construct and return new feature encoding, based on a given
616 training corpus C{train_toks}. See the L{class description
617 <BinaryMaxentFeatureEncoding>} for a description of the
618 joint-features that will be included in this encoding.
619
620 @type train_toks: C{list} of C{tuples} of (C{dict}, C{str})
621 @param train_toks: Training data, represented as a list of
622 pairs, the first member of which is a feature dictionary,
623 and the second of which is a classification label.
624
625 @type count_cutoff: C{int}
626 @param count_cutoff: A cutoff value that is used to discard
627 rare joint-features. If a joint-feature's value is 1
628 fewer than C{count_cutoff} times in the training corpus,
629 then that joint-feature is not included in the generated
630 encoding.
631
632 @type labels: C{list}
633 @param labels: A list of labels that should be used by the
634 classifier. If not specified, then the set of labels
635 attested in C{train_toks} will be used.
636
637 @param options: Extra parameters for the constructor, such as
638 C{unseen_features} and C{alwayson_features}.
639 """
640 mapping = {}
641 seen_labels = set()
642 count = defaultdict(int)
643
644 for (tok, label) in train_toks:
645 if labels and label not in labels:
646 raise ValueError('Unexpected label %s' % label)
647 seen_labels.add(label)
648
649
650 for (fname, fval) in tok.items():
651
652
653
654
655 count[fname,fval] += 1
656 if count[fname,fval] >= count_cutoff:
657 if (fname, fval, label) not in mapping:
658 mapping[fname, fval, label] = len(mapping)
659
660 if labels is None: labels = seen_labels
661 return cls(labels, mapping, **options)
662
664 """
665 A binary feature encoding which adds one new joint-feature to the
666 joint-features defined by L{BinaryMaxentFeatureEncoding}: a
667 correction feature, whose value is chosen to ensure that the
668 sparse vector always sums to a constant non-negative number. This
669 new feature is used to ensure two preconditions for the GIS
670 training algorithm:
671 - At least one feature vector index must be nonzero for every
672 token.
673 - The feature vector must sum to a constant non-negative number
674 for every token.
675 """
676 - def __init__(self, labels, mapping, unseen_features=False,
677 alwayson_features=False, C=None):
678 """
679 @param C: The correction constant. The value of the correction
680 feature is based on this value. In particular, its value is
681 C{C - sum([v for (f,v) in encoding])}.
682 @seealso: L{BinaryMaxentFeatureEncoding.__init__}
683 """
684 BinaryMaxentFeatureEncoding.__init__(
685 self, labels, mapping, unseen_features, alwayson_features)
686 if C is None:
687 C = len(set([fname for (fname,fval,label) in mapping]))+1
688
689 self._C = C
690 """The non-negative constant that all encoded feature vectors
691 will sum to."""
692
693 C = property(lambda self: self._C, doc="""
694 The non-negative constant that all encoded feature vectors
695 will sum to.""")
696
697 - def encode(self, featureset, label):
710
713
719
722 - def __init__(self, labels, mapping, unseen_features=False,
723 alwayson_features=False):
729
730 - def encode(self, featureset, label):
731 encoding = []
732 for feature, value in featureset.items():
733 if (feature, label) not in self._mapping:
734 self._mapping[(feature, label)] = len(self._mapping)
735 if value not in self._label_mapping:
736 if not isinstance(value, int):
737 self._label_mapping[value] = len(self._label_mapping)
738 else:
739 self._label_mapping[value] = value
740 encoding.append((self._mapping[(feature, label)],
741 self._label_mapping[value]))
742 return encoding
743
746
748 for (feature, label) in self._mapping:
749 if self._mapping[(feature, label)] == fid:
750 return (feature, label)
751
753 return len(self._mapping)
754
755 @classmethod
756 - def train(cls, train_toks, count_cutoff=0, labels=None, **options):
757 mapping = OrderedDict()
758 if not labels:
759 labels = []
760
761
762 train_toks = list(train_toks)
763
764 for (featureset, label) in train_toks:
765 if label not in labels:
766 labels.append(label)
767
768 for (featureset, label) in train_toks:
769 for label in labels:
770 for feature in featureset:
771 if (feature, label) not in mapping:
772 mapping[(feature, label)] = len(mapping)
773
774 return cls(labels, mapping, **options)
775
778 """
779 A feature encoding that generates vectors containing integer,
780 float and binary joint-features of the form::
781
782 Binary (for string and boolean features):
783 joint_feat(fs, l) = { 1 if (fs[fname] == fval) and (l == label)
784 {
785 { 0 otherwise
786 Value (for integer and float features):
787 joint_feat(fs, l) = { fval if (fs[fname] == type(fval))
788 { and (l == label)
789 {
790 { not encoded otherwise
791
792 Where C{fname} is the name of an input-feature, C{fval} is a value
793 for that input-feature, and C{label} is a label.
794
795 Typically, these features are constructed based on a training
796 corpus, using the L{train()} method.
797
798 For string and boolean features [type(fval) not in (int, float)]
799 this method will create one feature for each combination of
800 C{fname}, C{fval}, and C{label} that occurs at least once in the
801 training corpus.
802
803 For integer and float features [type(fval) in (int, float)] this
804 method will create one feature for each combination of C{fname}
805 and C{label} that occurs at least once in the training corpus.
806
807 For binary features the C{unseen_features} parameter can be used
808 to add X{unseen-value features}, which are used whenever an input
809 feature has a value that was not encountered in the training
810 corpus. These features have the form::
811
812 joint_feat(fs, l) = { 1 if is_unseen(fname, fs[fname])
813 { and l == label
814 {
815 { 0 otherwise
816
817 Where C{is_unseen(fname, fval)} is true if the encoding does not
818 contain any joint features that are true when C{fs[fname]==fval}.
819
820 The C{alwayson_features} parameter can be used to add X{always-on
821 features}, which have the form::
822
823 joint_feat(fs, l) = { 1 if (l == label)
824 {
825 { 0 otherwise
826
827 These always-on features allow the maxent model to directly model
828 the prior probabilities of each label.
829 """
830 - def __init__(self, labels, mapping, unseen_features=False,
831 alwayson_features=False):
832 """
833 @param labels: A list of the \"known labels\" for this encoding.
834
835 @param mapping: A dictionary mapping from C{(fname,fval,label)}
836 tuples to corresponding joint-feature indexes. These
837 indexes must be the set of integers from 0...len(mapping).
838 If C{mapping[fname,fval,label]=id}, then
839 C{self.encode({..., fname:fval, ...}, label)[id]} is 1;
840 otherwise, it is 0.
841
842 @param unseen_features: If true, then include unseen value
843 features in the generated joint-feature vectors.
844
845 @param alwayson_features: If true, then include always-on
846 features in the generated joint-feature vectors.
847 """
848 if set(mapping.values()) != set(range(len(mapping))):
849 raise ValueError('Mapping values must be exactly the '
850 'set of integers from 0...len(mapping)')
851
852 self._labels = list(labels)
853 """A list of attested labels."""
854
855 self._mapping = mapping
856 """dict mapping from (fname,fval,label) -> fid"""
857
858 self._length = len(mapping)
859 """The length of generated joint feature vectors."""
860
861 self._alwayson = None
862 """dict mapping from label -> fid"""
863
864 self._unseen = None
865 """dict mapping from fname -> fid"""
866
867 if alwayson_features:
868 self._alwayson = dict([(label,i+self._length)
869 for (i,label) in enumerate(labels)])
870 self._length += len(self._alwayson)
871
872 if unseen_features:
873 fnames = set(fname for (fname, fval, label) in mapping)
874 self._unseen = dict([(fname, i+self._length)
875 for (i, fname) in enumerate(fnames)])
876 self._length += len(fnames)
877
878 - def encode(self, featureset, label):
879
880 encoding = []
881
882
883 for fname, fval in featureset.items():
884 if(type(fval) in (int, float)):
885
886 if (fname, type(fval), label) in self._mapping:
887 encoding.append((self._mapping[fname, type(fval), label], fval))
888 else:
889
890 if (fname, fval, label) in self._mapping:
891 encoding.append((self._mapping[fname, fval, label], 1))
892
893
894 elif self._unseen:
895
896 for label2 in self._labels:
897 if (fname, fval, label2) in self._mapping:
898 break
899
900 else:
901 if fname in self._unseen:
902 encoding.append((self._unseen[fname], 1))
903
904
905
906 if self._alwayson and label in self._alwayson:
907 encoding.append((self._alwayson[label], 1))
908
909 return encoding
910
912
913 if not isinstance(f_id, (int, long)):
914 raise TypeError('describe() expected an int')
915 try:
916 self._inv_mapping
917 except AttributeError:
918 self._inv_mapping = [-1]*len(self._mapping)
919 for (info, i) in self._mapping.items():
920 self._inv_mapping[i] = info
921
922 if f_id < len(self._mapping):
923 (fname, fval, label) = self._inv_mapping[f_id]
924 return '%s==%r and label is %r' % (fname, fval, label)
925 elif self._alwayson and f_id in self._alwayson.values():
926 for (label, f_id2) in self._alwayson.items():
927 if f_id==f_id2: return 'label is %r' % label
928 elif self._unseen and f_id in self._unseen.values():
929 for (fname, f_id2) in self._unseen.items():
930 if f_id==f_id2: return '%s is unseen' % fname
931 else:
932 raise ValueError('Bad feature id')
933
935
936 return self._labels
937
939
940 return self._length
941
942 @classmethod
943 - def train(cls, train_toks, count_cutoff=0, labels=None, **options):
944 """
945 Construct and return new feature encoding, based on a given
946 training corpus C{train_toks}. See the L{class description
947 <TypedMaxentFeatureEncoding>} for a description of the
948 joint-features that will be included in this encoding.
949
950 Note: recognized feature values types are (int, float), over
951 types are interpreted as regular binary features.
952
953 @type train_toks: C{list} of C{tuples} of (C{dict}, C{str})
954 @param train_toks: Training data, represented as a list of
955 pairs, the first member of which is a feature dictionary,
956 and the second of which is a classification label.
957
958 @type count_cutoff: C{int}
959 @param count_cutoff: A cutoff value that is used to discard
960 rare joint-features. If a joint-feature's value is 1
961 fewer than C{count_cutoff} times in the training corpus,
962 then that joint-feature is not included in the generated
963 encoding.
964
965 @type labels: C{list}
966 @param labels: A list of labels that should be used by the
967 classifier. If not specified, then the set of labels
968 attested in C{train_toks} will be used.
969
970 @param options: Extra parameters for the constructor, such as
971 C{unseen_features} and C{alwayson_features}.
972 """
973 mapping = {}
974 seen_labels = set()
975 count = defaultdict(int)
976
977 for (tok, label) in train_toks:
978 if labels and label not in labels:
979 raise ValueError('Unexpected label %s' % label)
980 seen_labels.add(label)
981
982
983 for (fname, fval) in tok.items():
984 if(type(fval) in (int, float)): fval = type(fval)
985
986
987
988 count[fname,fval] += 1
989 if count[fname,fval] >= count_cutoff:
990 if (fname, fval, label) not in mapping:
991 mapping[fname, fval, label] = len(mapping)
992
993 if labels is None: labels = seen_labels
994 return cls(labels, mapping, **options)
995
1005 """
1006 Train a new C{ConditionalExponentialClassifier}, using the given
1007 training samples, using the Generalized Iterative Scaling
1008 algorithm. This C{ConditionalExponentialClassifier} will encode
1009 the model that maximizes entropy from all the models that are
1010 empirically consistent with C{train_toks}.
1011
1012 @see: L{train_maxent_classifier()} for parameter descriptions.
1013 """
1014 cutoffs.setdefault('max_iter', 100)
1015 cutoffchecker = CutoffChecker(cutoffs)
1016
1017
1018 if encoding is None:
1019 encoding = GISEncoding.train(train_toks, labels=labels)
1020
1021 if not hasattr(encoding, 'C'):
1022 raise TypeError('The GIS algorithm requires an encoding that '
1023 'defines C (e.g., GISEncoding).')
1024
1025
1026
1027
1028 Cinv = 1.0/encoding.C
1029
1030
1031 empirical_fcount = calculate_empirical_fcount(train_toks, encoding)
1032
1033
1034 unattested = set(numpy.nonzero(empirical_fcount==0)[0])
1035
1036
1037
1038 weights = numpy.zeros(len(empirical_fcount), 'd')
1039 for fid in unattested: weights[fid] = numpy.NINF
1040 classifier = ConditionalExponentialClassifier(encoding, weights)
1041
1042
1043 log_empirical_fcount = numpy.log2(empirical_fcount)
1044 del empirical_fcount
1045
1046
1047
1048 ll_old = None
1049 acc_old = None
1050
1051 if trace > 0: print ' ==> Training (%d iterations)' % cutoffs['max_iter']
1052 if trace > 2:
1053 print
1054 print ' Iteration Log Likelihood Accuracy'
1055 print ' ---------------------------------------'
1056
1057
1058 try:
1059 while True:
1060 if trace > 2:
1061 ll = cutoffchecker.ll or nltk.classify.util.log_likelihood(
1062 classifier, train_toks)
1063 acc = cutoffchecker.acc or nltk.classify.util.accuracy(
1064 classifier, train_toks)
1065 iternum = cutoffchecker.iter
1066 print ' %9d %14.5f %9.3f' % (iternum, ll, acc)
1067
1068
1069
1070 estimated_fcount = calculate_estimated_fcount(
1071 classifier, train_toks, encoding)
1072
1073
1074 for fid in unattested: estimated_fcount[fid] += 1
1075 log_estimated_fcount = numpy.log2(estimated_fcount)
1076 del estimated_fcount
1077
1078
1079 weights = classifier.weights()
1080 weights += (log_empirical_fcount - log_estimated_fcount) * Cinv
1081 classifier.set_weights(weights)
1082
1083
1084 if cutoffchecker.check(classifier, train_toks):
1085 break
1086
1087 except KeyboardInterrupt:
1088 print ' Training stopped: keyboard interrupt'
1089 except:
1090 raise
1091
1092 if trace > 2:
1093 ll = nltk.classify.util.log_likelihood(classifier, train_toks)
1094 acc = nltk.classify.util.accuracy(classifier, train_toks)
1095 print ' Final %14.5f %9.3f' % (ll, acc)
1096
1097
1098 return classifier
1099
1108
1120
1128 """
1129 Train a new C{ConditionalExponentialClassifier}, using the given
1130 training samples, using the Improved Iterative Scaling algorithm.
1131 This C{ConditionalExponentialClassifier} will encode the model
1132 that maximizes entropy from all the models that are empirically
1133 consistent with C{train_toks}.
1134
1135 @see: L{train_maxent_classifier()} for parameter descriptions.
1136 """
1137 cutoffs.setdefault('max_iter', 100)
1138 cutoffchecker = CutoffChecker(cutoffs)
1139
1140
1141 if encoding is None:
1142 encoding = BinaryMaxentFeatureEncoding.train(train_toks, labels=labels)
1143
1144
1145 empirical_ffreq = (calculate_empirical_fcount(train_toks, encoding) /
1146 len(train_toks))
1147
1148
1149
1150
1151
1152
1153 nfmap = calculate_nfmap(train_toks, encoding)
1154 nfarray = numpy.array(sorted(nfmap, key=nfmap.__getitem__), 'd')
1155 nftranspose = numpy.reshape(nfarray, (len(nfarray), 1))
1156
1157
1158 unattested = set(numpy.nonzero(empirical_ffreq==0)[0])
1159
1160
1161
1162 weights = numpy.zeros(len(empirical_ffreq), 'd')
1163 for fid in unattested: weights[fid] = numpy.NINF
1164 classifier = ConditionalExponentialClassifier(encoding, weights)
1165
1166 if trace > 0: print ' ==> Training (%d iterations)' % cutoffs['max_iter']
1167 if trace > 2:
1168 print
1169 print ' Iteration Log Likelihood Accuracy'
1170 print ' ---------------------------------------'
1171
1172
1173
1174 ll_old = None
1175 acc_old = None
1176
1177
1178 try:
1179 while True:
1180 if trace > 2:
1181 ll = cutoffchecker.ll or nltk.classify.util.log_likelihood(
1182 classifier, train_toks)
1183 acc = cutoffchecker.acc or nltk.classify.util.accuracy(
1184 classifier, train_toks)
1185 iternum = cutoffchecker.iter
1186 print ' %9d %14.5f %9.3f' % (iternum, ll, acc)
1187
1188
1189 deltas = calculate_deltas(
1190 train_toks, classifier, unattested, empirical_ffreq,
1191 nfmap, nfarray, nftranspose, encoding)
1192
1193
1194 weights = classifier.weights()
1195 weights += deltas
1196 classifier.set_weights(weights)
1197
1198
1199 if cutoffchecker.check(classifier, train_toks):
1200 break
1201
1202 except KeyboardInterrupt:
1203 print ' Training stopped: keyboard interrupt'
1204 except:
1205 raise
1206
1207
1208 if trace > 2:
1209 ll = nltk.classify.util.log_likelihood(classifier, train_toks)
1210 acc = nltk.classify.util.accuracy(classifier, train_toks)
1211 print ' Final %14.5f %9.3f' % (ll, acc)
1212
1213
1214 return classifier
1215
1217 """
1218 Construct a map that can be used to compress C{nf} (which is
1219 typically sparse).
1220
1221 M{nf(feature_vector)} is the sum of the feature values for
1222 M{feature_vector}.
1223
1224 This represents the number of features that are active for a
1225 given labeled text. This method finds all values of M{nf(t)}
1226 that are attested for at least one token in the given list of
1227 training tokens; and constructs a dictionary mapping these
1228 attested values to a continuous range M{0...N}. For example,
1229 if the only values of M{nf()} that were attested were 3, 5,
1230 and 7, then C{_nfmap} might return the dictionary {3:0, 5:1,
1231 7:2}.
1232
1233 @return: A map that can be used to compress C{nf} to a dense
1234 vector.
1235 @rtype: C{dictionary} from C{int} to C{int}
1236 """
1237
1238 nfset = set()
1239 for tok, _ in train_toks:
1240 for label in encoding.labels():
1241 nfset.add(sum([val for (id,val) in encoding.encode(tok,label)]))
1242 return dict([(nf, i) for (i, nf) in enumerate(nfset)])
1243
1244 -def calculate_deltas(train_toks, classifier, unattested, ffreq_empirical,
1245 nfmap, nfarray, nftranspose, encoding):
1246 """
1247 Calculate the update values for the classifier weights for
1248 this iteration of IIS. These update weights are the value of
1249 C{delta} that solves the equation::
1250
1251 ffreq_empirical[i]
1252 =
1253 SUM[fs,l] (classifier.prob_classify(fs).prob(l) *
1254 feature_vector(fs,l)[i] *
1255 exp(delta[i] * nf(feature_vector(fs,l))))
1256
1257 Where:
1258 - M{(fs,l)} is a (featureset, label) tuple from C{train_toks}
1259 - M{feature_vector(fs,l)} = C{encoding.encode(fs,l)}
1260 - M{nf(vector)} = C{sum([val for (id,val) in vector])}
1261
1262 This method uses Newton's method to solve this equation for
1263 M{delta[i]}. In particular, it starts with a guess of
1264 C{delta[i]}=1; and iteratively updates C{delta} with::
1265
1266 delta[i] -= (ffreq_empirical[i] - sum1[i])/(-sum2[i])
1267
1268 until convergence, where M{sum1} and M{sum2} are defined as::
1269
1270 sum1[i](delta) = SUM[fs,l] f[i](fs,l,delta)
1271
1272 sum2[i](delta) = SUM[fs,l] (f[i](fs,l,delta) *
1273 nf(feature_vector(fs,l)))
1274
1275 f[i](fs,l,delta) = (classifier.prob_classify(fs).prob(l) *
1276 feature_vector(fs,l)[i] *
1277 exp(delta[i] * nf(feature_vector(fs,l))))
1278
1279 Note that M{sum1} and M{sum2} depend on C{delta}; so they need
1280 to be re-computed each iteration.
1281
1282 The variables C{nfmap}, C{nfarray}, and C{nftranspose} are
1283 used to generate a dense encoding for M{nf(ltext)}. This
1284 allows C{_deltas} to calculate M{sum1} and M{sum2} using
1285 matrices, which yields a signifigant performance improvement.
1286
1287 @param train_toks: The set of training tokens.
1288 @type train_toks: C{list} of C{tuples} of (C{dict}, C{str})
1289 @param classifier: The current classifier.
1290 @type classifier: C{ClassifierI}
1291 @param ffreq_empirical: An array containing the empirical
1292 frequency for each feature. The M{i}th element of this
1293 array is the empirical frequency for feature M{i}.
1294 @type ffreq_empirical: C{sequence} of C{float}
1295 @param unattested: An array that is 1 for features that are
1296 not attested in the training data; and 0 for features that
1297 are attested. In other words, C{unattested[i]==0} iff
1298 C{ffreq_empirical[i]==0}.
1299 @type unattested: C{sequence} of C{int}
1300 @param nfmap: A map that can be used to compress C{nf} to a dense
1301 vector.
1302 @type nfmap: C{dictionary} from C{int} to C{int}
1303 @param nfarray: An array that can be used to uncompress C{nf}
1304 from a dense vector.
1305 @type nfarray: C{array} of C{float}
1306 @param nftranspose: C{array} of C{float}
1307 @type nftranspose: The transpose of C{nfarray}
1308 """
1309
1310
1311
1312 NEWTON_CONVERGE = 1e-12
1313 MAX_NEWTON = 300
1314
1315 deltas = numpy.ones(encoding.length(), 'd')
1316
1317
1318
1319
1320 A = numpy.zeros((len(nfmap), encoding.length()), 'd')
1321
1322 for tok, label in train_toks:
1323 dist = classifier.prob_classify(tok)
1324
1325 for label in encoding.labels():
1326
1327 feature_vector = encoding.encode(tok,label)
1328
1329 nf = sum([val for (id, val) in feature_vector])
1330
1331 for (id, val) in feature_vector:
1332 A[nfmap[nf], id] += dist.prob(label) * val
1333 A /= len(train_toks)
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343 for rangenum in range(MAX_NEWTON):
1344 nf_delta = numpy.outer(nfarray, deltas)
1345 exp_nf_delta = 2 ** nf_delta
1346 nf_exp_nf_delta = nftranspose * exp_nf_delta
1347 sum1 = numpy.sum(exp_nf_delta * A, axis=0)
1348 sum2 = numpy.sum(nf_exp_nf_delta * A, axis=0)
1349
1350
1351 for fid in unattested: sum2[fid] += 1
1352
1353
1354 deltas -= (ffreq_empirical - sum1) / -sum2
1355
1356
1357 n_error = (numpy.sum(abs((ffreq_empirical-sum1)))/
1358 numpy.sum(abs(deltas)))
1359 if n_error < NEWTON_CONVERGE:
1360 return deltas
1361
1362 return deltas
1363
1364
1365
1366
1367
1368
1369
1370 -def train_maxent_classifier_with_scipy(train_toks, trace=3, encoding=None,
1371 labels=None, algorithm='CG',
1372 sparse=True, gaussian_prior_sigma=0,
1373 **cutoffs):
1374 """
1375 Train a new C{ConditionalExponentialClassifier}, using the given
1376 training samples, using the specified C{scipy} optimization
1377 algorithm. This C{ConditionalExponentialClassifier} will encode
1378 the model that maximizes entropy from all the models that are
1379 empirically consistent with C{train_toks}.
1380
1381 @see: L{train_maxent_classifier()} for parameter descriptions.
1382 @require: The C{scipy} package must be installed.
1383 """
1384 try:
1385 import scipy
1386 except ImportError, e:
1387 raise ValueError('The maxent training algorithm %r requires '
1388 'that the scipy package be installed. See '
1389 'http://www.scipy.org/' % algorithm)
1390 try:
1391
1392 import scipy.sparse, scipy.maxentropy
1393 except ImportError, e:
1394 raise ValueError('Import of scipy package failed: %s' % e)
1395
1396
1397 if encoding is None:
1398 encoding = BinaryMaxentFeatureEncoding.train(train_toks, labels=labels)
1399 elif labels is not None:
1400 raise ValueError('Specify encoding or labels, not both')
1401
1402 labels = encoding.labels()
1403 labelnum = dict([(label, i) for (i, label) in enumerate(labels)])
1404 num_features = encoding.length()
1405 num_toks = len(train_toks)
1406 num_labels = len(labels)
1407
1408
1409
1410
1411
1412
1413
1414 if sparse: zeros = scipy.sparse.lil_matrix
1415 else: zeros = numpy.zeros
1416
1417
1418
1419
1420
1421 F = zeros((num_features, num_toks*num_labels))
1422
1423
1424
1425
1426 N = zeros((1, num_toks*num_labels))
1427
1428
1429
1430 for toknum, (featureset, label) in enumerate(train_toks):
1431 N[0, toknum*len(labels) + labelnum[label]] += 1
1432 for label2 in labels:
1433 for (fid, fval) in encoding.encode(featureset, label2):
1434 F[fid, toknum*len(labels) + labelnum[label2]] = fval
1435
1436
1437 model = scipy.maxentropy.conditionalmodel(F, N, num_toks)
1438
1439 if gaussian_prior_sigma:
1440 model.sigma2 = gaussian_prior_sigma**2
1441 if algorithm == 'LBFGSB':
1442 model.log = None
1443 if trace >= 3:
1444 model.verbose = True
1445 if 'max_iter' in cutoffs:
1446 model.maxiter = cutoffs['max_iter']
1447 if 'tolerance' in cutoffs:
1448 if algorithm == 'CG': model.avegtol = cutoffs['tolerance']
1449 elif algorithm == 'LBFGSB': model.maxgtol = cutoffs['tolerance']
1450 else: model.tol = cutoffs['tolerance']
1451
1452
1453 model.fit(algorithm=algorithm)
1454
1455
1456 weights = model.params * numpy.log2(numpy.e)
1457
1458
1459 return MaxentClassifier(encoding, weights)
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469 -def train_maxent_classifier_with_megam(train_toks, trace=3, encoding=None,
1470 labels=None, gaussian_prior_sigma=0,
1471 **kwargs):
1472 """
1473 Train a new C{ConditionalExponentialClassifier}, using the given
1474 training samples, using the external C{megam} library. This
1475 C{ConditionalExponentialClassifier} will encode the model that
1476 maximizes entropy from all the models that are empirically
1477 consistent with C{train_toks}.
1478
1479 @see: L{train_maxent_classifier()} for parameter descriptions.
1480 @see: L{nltk.classify.megam}
1481 """
1482
1483 explicit = True
1484 bernoulli = True
1485 if 'explicit' in kwargs: explicit = kwargs['explicit']
1486 if 'bernoulli' in kwargs: bernoulli = kwargs['bernoulli']
1487
1488
1489 if encoding is None:
1490
1491
1492 count_cutoff = kwargs.get('count_cutoff', 0)
1493 encoding = BinaryMaxentFeatureEncoding.train(train_toks, count_cutoff,
1494 labels=labels,
1495 alwayson_features=True)
1496 elif labels is not None:
1497 raise ValueError('Specify encoding or labels, not both')
1498
1499
1500 try:
1501 fd, trainfile_name = tempfile.mkstemp(prefix='nltk-', suffix='.gz')
1502 trainfile = gzip.open(trainfile_name, 'wb')
1503 write_megam_file(train_toks, encoding, trainfile, \
1504 explicit=explicit, bernoulli=bernoulli)
1505 trainfile.close()
1506 except (OSError, IOError, ValueError), e:
1507 raise ValueError('Error while creating megam training file: %s' % e)
1508
1509
1510 options = []
1511 options += ['-nobias', '-repeat', '10']
1512 if explicit:
1513 options += ['-explicit']
1514 if not bernoulli:
1515 options += ['-fvals']
1516 if gaussian_prior_sigma:
1517
1518
1519
1520 inv_variance = 1.0 / gaussian_prior_sigma**2
1521 else:
1522 inv_variance = 0
1523 options += ['-lambda', '%.2f' % inv_variance, '-tune']
1524 if trace < 3:
1525 options += ['-quiet']
1526 if 'max_iter' in kwargs:
1527 options += ['-maxi', '%s' % kwargs['max_iter']]
1528 if 'll_delta' in kwargs:
1529
1530
1531 options += ['-dpp', '%s' % abs(kwargs['ll_delta'])]
1532 options += ['multiclass', trainfile_name]
1533 stdout = call_megam(options)
1534
1535
1536 try: os.remove(trainfile_name)
1537 except (OSError, IOError), e:
1538 print 'Warning: unable to delete %s: %s' % (trainfile_name, e)
1539
1540
1541 weights = parse_megam_weights(stdout, encoding.length(), explicit)
1542
1543
1544 weights *= numpy.log2(numpy.e)
1545
1546
1547 return MaxentClassifier(encoding, weights)
1548
1554 @classmethod
1555 - def train(cls, train_toks, **kwargs):
1556 algorithm = kwargs.get('algorithm', 'tao_lmvm')
1557 trace = kwargs.get('trace', 3)
1558 encoding = kwargs.get('encoding', None)
1559 labels = kwargs.get('labels', None)
1560 sigma = kwargs.get('gaussian_prior_sigma', 0)
1561 count_cutoff = kwargs.get('count_cutoff', 0)
1562 max_iter = kwargs.get('max_iter')
1563 ll_delta = kwargs.get('min_lldelta')
1564
1565
1566 if not encoding:
1567 encoding = TadmEventMaxentFeatureEncoding.train(train_toks,
1568 count_cutoff,
1569 labels=labels)
1570
1571 trainfile_fd, trainfile_name = \
1572 tempfile.mkstemp(prefix='nltk-tadm-events-', suffix='.gz')
1573 weightfile_fd, weightfile_name = \
1574 tempfile.mkstemp(prefix='nltk-tadm-weights-')
1575
1576 trainfile = gzip.open(trainfile_name, 'wb')
1577 write_tadm_file(train_toks, encoding, trainfile)
1578 trainfile.close()
1579
1580 options = []
1581 options.extend(['-monitor'])
1582 options.extend(['-method', algorithm])
1583 if sigma:
1584 options.extend(['-l2', '%.6f' % sigma**2])
1585 if max_iter:
1586 options.extend(['-max_it', '%d' % max_iter])
1587 if ll_delta:
1588 options.extend(['-fatol', '%.6f' % abs(ll_delta)])
1589 options.extend(['-events_in', trainfile_name])
1590 options.extend(['-params_out', weightfile_name])
1591 if trace < 3:
1592 options.extend(['2>&1'])
1593 else:
1594 options.extend(['-summary'])
1595
1596 call_tadm(options)
1597
1598 weightfile = open(weightfile_name, 'rb')
1599 weights = parse_tadm_weights(weightfile)
1600 weightfile.close()
1601
1602 os.remove(trainfile_name)
1603 os.remove(weightfile_name)
1604
1605
1606 weights *= numpy.log2(numpy.e)
1607
1608
1609 return cls(encoding, weights)
1610
1611
1612
1613
1614 -def demo():
1617
1618 if __name__ == '__main__':
1619 demo()
1620