1
2
3
4
5
6
7
8
9
10 import os
11 import subprocess
12 from optparse import OptionParser
13 import tempfile
14 import operator
15
16 import nltk
17 from nltk.sem.logic import *
18 from nltk.sem.drt import *
19
20 """
21 An interface to Boxer.
22
23 Usage:
24 Set the environment variable CANDCHOME to the bin directory of your CandC installation.
25 The models directory should be in the CandC root directory.
26 For example:
27 /path/to/candc/
28 bin/
29 candc
30 boxer
31 models/
32 boxer/
33 """
34
36 """
37 This class is an interface to Johan Bos's program Boxer, a wide-coverage
38 semantic parser that produces Discourse Representation Structures (DRSs).
39 """
40
41 - def __init__(self, boxer_drs_interpreter=None, elimeq=False, bin_dir=None, verbose=False):
42 """
43 @param boxer_drs_interpreter: A class that converts from the
44 C{AbstractBoxerDrs} object hierarchy to a different object. The
45 default is C{NltkDrtBoxerDrsInterpreter}, which converts to the NLTK
46 DRT hierarchy.
47 @param elimeq: When set to true, Boxer removes all equalities from the
48 DRSs and discourse referents standing in the equality relation are
49 unified, but only if this can be done in a meaning-preserving manner.
50 """
51 if boxer_drs_interpreter is None:
52 boxer_drs_interpreter = NltkDrtBoxerDrsInterpreter()
53 self._boxer_drs_interpreter = boxer_drs_interpreter
54
55 self._elimeq = elimeq
56
57 self.set_bin_dir(bin_dir, verbose)
58
60 self._candc_bin = self._find_binary('candc', bin_dir, verbose)
61 self._candc_models_path = os.path.normpath(os.path.join(self._candc_bin[:-5], '../models'))
62 self._boxer_bin = self._find_binary('boxer', bin_dir, verbose)
63
64 - def interpret(self, input, discourse_id=None, question=False, verbose=False):
65 """
66 Use Boxer to give a first order representation.
67
68 @param input: C{str} Input sentence to parse
69 @param occur_index: C{boolean} Should predicates be occurrence indexed?
70 @param discourse_id: C{str} An identifier to be inserted to each occurrence-indexed predicate.
71 @return: C{drt.AbstractDrs}
72 """
73 if discourse_id is not None:
74 discourse_ids = [discourse_id]
75 else:
76 discourse_ids = None
77 d, = self.batch_interpret_multisentence([[input]], discourse_ids, question, verbose)
78 if not d:
79 raise Exception('Unable to interpret: "%s"' % input)
80 return d
81
83 """
84 Use Boxer to give a first order representation.
85
86 @param input: C{list} of C{str} Input sentences to parse as a single discourse
87 @param occur_index: C{boolean} Should predicates be occurrence indexed?
88 @param discourse_id: C{str} An identifier to be inserted to each occurrence-indexed predicate.
89 @return: C{drt.AbstractDrs}
90 """
91 if discourse_id is not None:
92 discourse_ids = [discourse_id]
93 else:
94 discourse_ids = None
95 d, = self.batch_interpret_multisentence([input], discourse_ids, question, verbose)
96 if not d:
97 raise Exception('Unable to interpret: "%s"' % input)
98 return d
99
100 - def batch_interpret(self, inputs, discourse_ids=None, question=False, verbose=False):
101 """
102 Use Boxer to give a first order representation.
103
104 @param inputs: C{list} of C{str} Input sentences to parse as individual discourses
105 @param occur_index: C{boolean} Should predicates be occurrence indexed?
106 @param discourse_ids: C{list} of C{str} Identifiers to be inserted to each occurrence-indexed predicate.
107 @return: C{list} of C{drt.AbstractDrs}
108 """
109 return self.batch_interpret_multisentence([[input] for input in inputs], discourse_ids, question, verbose)
110
112 """
113 Use Boxer to give a first order representation.
114
115 @param inputs: C{list} of C{list} of C{str} Input discourses to parse
116 @param occur_index: C{boolean} Should predicates be occurrence indexed?
117 @param discourse_ids: C{list} of C{str} Identifiers to be inserted to each occurrence-indexed predicate.
118 @return: C{drt.AbstractDrs}
119 """
120 _, temp_filename = tempfile.mkstemp(prefix='boxer-', suffix='.in', text=True)
121
122 if discourse_ids is not None:
123 assert len(inputs) == len(discourse_ids)
124 assert reduce(operator.and_, (id is not None for id in discourse_ids))
125 use_disc_id = True
126 else:
127 discourse_ids = map(str, xrange(len(inputs)))
128 use_disc_id = False
129
130 candc_out = self._call_candc(inputs, discourse_ids, question, temp_filename, verbose=verbose)
131 boxer_out = self._call_boxer(temp_filename, verbose=verbose)
132
133 os.remove(temp_filename)
134
135
136
137
138 drs_dict = self._parse_to_drs_dict(boxer_out, use_disc_id)
139 return [drs_dict.get(id, None) for id in discourse_ids]
140
141 - def _call_candc(self, inputs, discourse_ids, question, filename, verbose=False):
142 """
143 Call the C{candc} binary with the given input.
144
145 @param inputs: C{list} of C{list} of C{str} Input discourses to parse
146 @param discourse_ids: C{list} of C{str} Identifiers to be inserted to each occurrence-indexed predicate.
147 @param filename: C{str} A filename for the output file
148 @return: stdout
149 """
150 args = ['--models', os.path.join(self._candc_models_path, ['boxer','questions'][question]),
151 '--output', filename,
152 '--candc-printer', 'boxer']
153 return self._call('\n'.join(sum((["<META>'%s'" % id] + d for d,id in zip(inputs,discourse_ids)), [])), self._candc_bin, args, verbose)
154
156 """
157 Call the C{boxer} binary with the given input.
158
159 @param filename: C{str} A filename for the input file
160 @return: stdout
161 """
162 args = ['--box', 'false',
163 '--semantics', 'drs',
164 '--flat', 'false',
165 '--resolve', 'true',
166 '--elimeq', ['false','true'][self._elimeq],
167 '--format', 'prolog',
168 '--instantiate', 'true',
169 '--input', filename]
170
171 return self._call(None, self._boxer_bin, args, verbose)
172
174 return nltk.internals.find_binary(name,
175 path_to_bin=bin_dir,
176 env_vars=['CANDCHOME'],
177 url='http://svn.ask.it.usyd.edu.au/trac/candc/',
178 binary_names=[name, name + '.exe'],
179 verbose=verbose)
180
181 - def _call(self, input_str, binary, args=[], verbose=False):
182 """
183 Call the binary with the given input.
184
185 @param input_str: A string whose contents are used as stdin.
186 @param binary: The location of the binary to call
187 @param args: A list of command-line arguments.
188 @return: stdout
189 """
190 if verbose:
191 print 'Calling:', binary
192 print 'Args:', args
193 print 'Input:', input_str
194 print 'Command:', binary + ' ' + ' '.join(args)
195
196
197 if input_str is None:
198 cmd = [binary] + args
199 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
200 else:
201 cmd = 'echo "%s" | %s %s' % (input_str, binary, ' '.join(args))
202 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
203 stdout, stderr = p.communicate()
204
205 if verbose:
206 print 'Return code:', p.returncode
207 if stdout: print 'stdout:\n', stdout, '\n'
208 if stderr: print 'stderr:\n', stderr, '\n'
209 if p.returncode != 0:
210 raise Exception('ERROR CALLING: %s %s\nReturncode: %d\n%s' % (binary, ' '.join(args), p.returncode, stderr))
211
212 return stdout
213
215 lines = boxer_out.split('\n')
216 drs_dict = {}
217 i = 0
218 while i < len(lines):
219 line = lines[i]
220 if line.startswith('id('):
221 comma_idx = line.index(',')
222 discourse_id = line[3:comma_idx]
223 if discourse_id[0] == "'" and discourse_id[-1] == "'":
224 discourse_id = discourse_id[1:-1]
225 drs_id = line[comma_idx+1:line.index(')')]
226 i += 1
227 line = lines[i]
228 assert line.startswith('sem(%s,' % drs_id)
229
230 i += 4
231 line = lines[i]
232 assert line.endswith(').')
233 drs_input = line[:-2].strip()
234 parsed = self._parse_drs(drs_input, discourse_id, use_disc_id)
235 drs_dict[discourse_id] = self._boxer_drs_interpreter.interpret(parsed)
236 i += 1
237 return drs_dict
238
239 - def _parse_drs(self, drs_string, discourse_id, use_disc_id):
241
242
245 """
246 This class is used to parse the Prolog DRS output from Boxer into a
247 hierarchy of python objects.
248 """
249 DrtParser.__init__(self)
250 self.discourse_id = discourse_id
251 self.sentence_id_offset = None
252 self.quote_chars = [("'", "'", "\\", False)]
253 self._label_counter = None
254
255 - def parse(self, data, signature=None):
258
260 return ['(', ')', ',', '[', ']',':']
261
262 - def handle(self, tok, context):
264
267
279
285
323
329
331
332 self.assertToken(self.token(), '(')
333 variable = self.parse_variable()
334 self.assertToken(self.token(), ',')
335 name = self.token()
336 self.assertToken(self.token(), ',')
337 pos = self.token()
338 self.assertToken(self.token(), ',')
339 sense = int(self.token())
340 self.assertToken(self.token(), ')')
341
342 def _handle_pred_f(sent_index, word_indices):
343 if name=='event' and sent_index is None and ((pos=='n' and sense==1) or (pos=='v' and sense==0)):
344 return BoxerEvent(variable)
345 else:
346 return BoxerPred(self.discourse_id, sent_index, word_indices, variable, name, pos, sense)
347 return _handle_pred_f
348
350
351 self.assertToken(self.token(), '(')
352 variable = self.parse_variable()
353 self.assertToken(self.token(), ',')
354 name = self.token()
355 self.assertToken(self.token(), ',')
356 type = self.token()
357 self.assertToken(self.token(), ',')
358 sense = int(self.token())
359 self.assertToken(self.token(), ')')
360 return lambda sent_index, word_indices: BoxerNamed(self.discourse_id, sent_index, word_indices, variable, name, type, sense)
361
363
364 self.assertToken(self.token(), '(')
365 var1 = self.parse_variable()
366 self.assertToken(self.token(), ',')
367 var2 = self.parse_variable()
368 self.assertToken(self.token(), ',')
369 rel = self.token()
370 self.assertToken(self.token(), ',')
371 sense = int(self.token())
372 self.assertToken(self.token(), ')')
373 return lambda sent_index, word_indices: BoxerRel(self.discourse_id, sent_index, word_indices, var1, var2, rel, sense)
374
383
385
386 tok = self.token()
387 self.assertToken(self.token(), '(')
388 if tok == 'date':
389 conds = self._handle_date(arg)
390 elif tok == 'time':
391 conds = self._handle_time(arg)
392 else:
393 return None
394 self.assertToken(self.token(), ')')
395 return [lambda sent_index, word_indices: BoxerPred(self.discourse_id, sent_index, word_indices, arg, tok, 'n', 0)] + \
396 [lambda sent_index, word_indices: cond for cond in conds]
397
399
400 conds = []
401 (sent_index, word_indices), = self._sent_and_word_indices(self._parse_index_list())
402 self.assertToken(self.token(), '(')
403 pol = self.token()
404 self.assertToken(self.token(), ')')
405 conds.append(BoxerPred(self.discourse_id, sent_index, word_indices, arg, 'date_pol_%s' % (pol), 'a', 0))
406 self.assertToken(self.token(), ',')
407
408 (sent_index, word_indices), = self._sent_and_word_indices(self._parse_index_list())
409 year = self.token()
410 if year != 'XXXX':
411 year = year.replace(':', '_')
412 conds.append(BoxerPred(self.discourse_id, sent_index, word_indices, arg, 'date_year_%s' % (year), 'a', 0))
413 self.assertToken(self.token(), ',')
414
415 (sent_index, word_indices), = self._sent_and_word_indices(self._parse_index_list())
416 month = self.token()
417 if month != 'XX':
418 conds.append(BoxerPred(self.discourse_id, sent_index, word_indices, arg, 'date_month_%s' % (month), 'a', 0))
419 self.assertToken(self.token(), ',')
420
421 (sent_index, word_indices), = self._sent_and_word_indices(self._parse_index_list())
422 day = self.token()
423 if day != 'XX':
424 conds.append(BoxerPred(self.discourse_id, sent_index, word_indices, arg, 'date_day_%s' % (day), 'a', 0))
425
426 return conds
427
429
430 conds = []
431 self._parse_index_list()
432 hour = self.token()
433 if hour != 'XX':
434 conds.append(self._make_atom('r_hour_2',arg,hour))
435 self.assertToken(self.token(), ',')
436
437 self._parse_index_list()
438 min = self.token()
439 if min != 'XX':
440 conds.append(self._make_atom('r_min_2',arg,min))
441 self.assertToken(self.token(), ',')
442
443 self._parse_index_list()
444 sec = self.token()
445 if sec != 'XX':
446 conds.append(self._make_atom('r_sec_2',arg,sec))
447
448 return conds
449
451
452 self.assertToken(self.token(), '(')
453 variable = self.parse_variable()
454 self.assertToken(self.token(), ',')
455 value = self.token()
456 self.assertToken(self.token(), ',')
457 type = self.token()
458 self.assertToken(self.token(), ')')
459 return lambda sent_index, word_indices: BoxerCard(self.discourse_id, sent_index, word_indices, variable, value, type)
460
469
481
483
484
485
486 label = self._label_counter.get()
487 self.assertToken(self.token(), '(')
488 self.assertToken(self.token(), '[')
489 refs = set()
490 while self.token(0) != ']':
491 indices = self._parse_index_list()
492 refs.add(self.parse_variable())
493 if self.token(0) == ',':
494 self.token()
495 self.token()
496 self.assertToken(self.token(), ',')
497 self.assertToken(self.token(), '[')
498 conds = []
499 while self.token(0) != ']':
500 indices = self._parse_index_list()
501 conds.extend(self.parse_condition(indices))
502 if self.token(0) == ',':
503 self.token()
504 self.token()
505 self.assertToken(self.token(), ')')
506 return BoxerDrs(label, list(refs), conds)
507
515
523
524
526 self.assertToken(self.token(), '(')
527 self.assertToken(self.token(), '[')
528 ans_types = []
529 while self.token(0) != ']':
530 cat = self.token()
531 self.assertToken(self.token(), ':')
532 if cat == 'des':
533 ans_types.append(self.token())
534 elif cat == 'num':
535 ans_types.append('number')
536 typ = self.token()
537 if typ == 'cou':
538 ans_types.append('count')
539 else:
540 ans_types.append(typ)
541 else:
542 ans_types.append(self.token())
543 self.token()
544
545 self.assertToken(self.token(), ',')
546 d1 = self.parse_Expression(None)
547 self.assertToken(self.token(), ',')
548 ref = self.parse_variable()
549 self.assertToken(self.token(), ',')
550 d2 = self.parse_Expression(None)
551 self.assertToken(self.token(), ')')
552 return lambda sent_index, word_indices: BoxerWhq(self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2)
553
555 return BoxerDrs(drs1.label, drs1.refs + drs2.refs, drs1.conds + drs2.conds)
556
558 return BoxerOr(self.discourse_id, sent_index, word_indices, drs1, drs2)
559
561 return BoxerDrs(drs1.label, drs1.refs, drs1.conds, drs2)
562
564 var = self.token()
565 assert re.match('^x\d+$', var)
566 return int(var[1:])
567
570
572 """
573 @return: C{list} of (sent_index, word_indices) tuples
574 """
575 sent_indices = set((i / 1000)-1 for i in indices if i>=0)
576 if sent_indices:
577 pairs = []
578 for sent_index in sent_indices:
579 word_indices = [(i % 1000)-1 for i in indices if sent_index == (i / 1000)-1]
580 pairs.append((sent_index, word_indices))
581 return pairs
582 else:
583 word_indices = [(i % 1000)-1 for i in indices]
584 return [(None, word_indices)]
585
586
588 """
589 Reparse the str form of subclasses of C{AbstractBoxerDrs}
590 """
594
597
600
601 - def handle(self, tok, context):
602 try:
603 if tok == 'drs':
604 self.assertNextToken(DrtTokens.OPEN)
605 label = int(self.token())
606 self.assertNextToken(DrtTokens.COMMA)
607 refs = map(int, self.handle_refs())
608 self.assertNextToken(DrtTokens.COMMA)
609 conds = self.handle_conds(None)
610 self.assertNextToken(DrtTokens.CLOSE)
611 return BoxerDrs(label, refs, conds)
612 elif tok == 'pred':
613 self.assertNextToken(DrtTokens.OPEN)
614 disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
615 self.assertNextToken(DrtTokens.COMMA)
616 sent_id = self.nullableIntToken()
617 self.assertNextToken(DrtTokens.COMMA)
618 word_ids = map(int, self.handle_refs())
619 self.assertNextToken(DrtTokens.COMMA)
620 variable = int(self.token())
621 self.assertNextToken(DrtTokens.COMMA)
622 name = self.token()
623 self.assertNextToken(DrtTokens.COMMA)
624 pos = self.token()
625 self.assertNextToken(DrtTokens.COMMA)
626 sense = int(self.token())
627 self.assertNextToken(DrtTokens.CLOSE)
628 return BoxerPred(disc_id, sent_id, word_ids, variable, name, pos, sense)
629 elif tok == 'named':
630 self.assertNextToken(DrtTokens.OPEN)
631 disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
632 self.assertNextToken(DrtTokens.COMMA)
633 sent_id = int(self.token())
634 self.assertNextToken(DrtTokens.COMMA)
635 word_ids = map(int, self.handle_refs())
636 self.assertNextToken(DrtTokens.COMMA)
637 variable = int(self.token())
638 self.assertNextToken(DrtTokens.COMMA)
639 name = self.token()
640 self.assertNextToken(DrtTokens.COMMA)
641 type = self.token()
642 self.assertNextToken(DrtTokens.COMMA)
643 sense = int(self.token())
644 self.assertNextToken(DrtTokens.CLOSE)
645 return BoxerNamed(disc_id, sent_id, word_ids, variable, name, type, sense)
646 elif tok == 'rel':
647 self.assertNextToken(DrtTokens.OPEN)
648 disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
649 self.assertNextToken(DrtTokens.COMMA)
650 sent_id = self.nullableIntToken()
651 self.assertNextToken(DrtTokens.COMMA)
652 word_ids = map(int, self.handle_refs())
653 self.assertNextToken(DrtTokens.COMMA)
654 var1 = int(self.token())
655 self.assertNextToken(DrtTokens.COMMA)
656 var2 = int(self.token())
657 self.assertNextToken(DrtTokens.COMMA)
658 rel = self.token()
659 self.assertNextToken(DrtTokens.COMMA)
660 sense = int(self.token())
661 self.assertNextToken(DrtTokens.CLOSE)
662 return BoxerRel(disc_id, sent_id, word_ids, var1, var2, rel, sense)
663 elif tok == 'event':
664 self.assertNextToken(DrtTokens.OPEN)
665 var = int(self.token())
666 self.assertNextToken(DrtTokens.CLOSE)
667 return BoxerEvent(var)
668 elif tok == 'prop':
669 self.assertNextToken(DrtTokens.OPEN)
670 disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
671 self.assertNextToken(DrtTokens.COMMA)
672 sent_id = int(self.token())
673 self.assertNextToken(DrtTokens.COMMA)
674 word_ids = map(int, self.handle_refs())
675 self.assertNextToken(DrtTokens.COMMA)
676 variable = int(self.token())
677 self.assertNextToken(DrtTokens.COMMA)
678 drs = self.parse_Expression(None)
679 self.assertNextToken(DrtTokens.CLOSE)
680 return BoxerProp(disc_id, sent_id, word_ids, variable, drs)
681 elif tok == 'not':
682 self.assertNextToken(DrtTokens.OPEN)
683 drs = self.parse_Expression(None)
684 self.assertNextToken(DrtTokens.CLOSE)
685 return BoxerNot(drs)
686 elif tok == 'imp':
687 self.assertNextToken(DrtTokens.OPEN)
688 drs1 = self.parse_Expression(None)
689 self.assertNextToken(DrtTokens.COMMA)
690 drs2 = self.parse_Expression(None)
691 self.assertNextToken(DrtTokens.CLOSE)
692 return BoxerDrs(drs1.label, drs1.refs, drs1.conds, drs2)
693 elif tok == 'or':
694 self.assertNextToken(DrtTokens.OPEN)
695 disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
696 self.assertNextToken(DrtTokens.COMMA)
697 sent_id = self.nullableIntToken()
698 self.assertNextToken(DrtTokens.COMMA)
699 word_ids = map(int, self.handle_refs())
700 self.assertNextToken(DrtTokens.COMMA)
701 drs1 = self.parse_Expression(None)
702 self.assertNextToken(DrtTokens.COMMA)
703 drs2 = self.parse_Expression(None)
704 self.assertNextToken(DrtTokens.CLOSE)
705 return BoxerOr(disc_id, sent_id, word_ids, drs1, drs2)
706 elif tok == 'eq':
707 self.assertNextToken(DrtTokens.OPEN)
708 disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
709 self.assertNextToken(DrtTokens.COMMA)
710 sent_id = self.nullableIntToken()
711 self.assertNextToken(DrtTokens.COMMA)
712 word_ids = map(int, self.handle_refs())
713 self.assertNextToken(DrtTokens.COMMA)
714 var1 = int(self.token())
715 self.assertNextToken(DrtTokens.COMMA)
716 var2 = int(self.token())
717 self.assertNextToken(DrtTokens.CLOSE)
718 return BoxerEq(disc_id, sent_id, word_ids, var1, var2)
719 elif tok == 'card':
720 self.assertNextToken(DrtTokens.OPEN)
721 disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
722 self.assertNextToken(DrtTokens.COMMA)
723 sent_id = self.nullableIntToken()
724 self.assertNextToken(DrtTokens.COMMA)
725 word_ids = map(int, self.handle_refs())
726 self.assertNextToken(DrtTokens.COMMA)
727 var = int(self.token())
728 self.assertNextToken(DrtTokens.COMMA)
729 value = self.token()
730 self.assertNextToken(DrtTokens.COMMA)
731 type = self.token()
732 self.assertNextToken(DrtTokens.CLOSE)
733 return BoxerCard(disc_id, sent_id, word_ids, var, value, type)
734 elif tok == 'whq':
735 self.assertNextToken(DrtTokens.OPEN)
736 disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
737 self.assertNextToken(DrtTokens.COMMA)
738 sent_id = self.nullableIntToken()
739 self.assertNextToken(DrtTokens.COMMA)
740 word_ids = map(int, self.handle_refs())
741 self.assertNextToken(DrtTokens.COMMA)
742 ans_types = self.handle_refs()
743 self.assertNextToken(DrtTokens.COMMA)
744 drs1 = self.parse_Expression(None)
745 self.assertNextToken(DrtTokens.COMMA)
746 var = int(self.token())
747 self.assertNextToken(DrtTokens.COMMA)
748 drs2 = self.parse_Expression(None)
749 self.assertNextToken(DrtTokens.CLOSE)
750 return BoxerWhq(disc_id, sent_id, word_ids, ans_types, drs1, var, drs2)
751 except Exception, e:
752 raise ParseException(self._currentIndex, str(e))
753 assert False, repr(tok)
754
756 t = self.token()
757 return [None,int(t)][t != 'None']
758
764
765
766
769 """
770 @return: (set<variables>, set<events>, set<propositions>)
771 """
772 variables, events, propositions = self._variables()
773 return (variables - (events | propositions), events, propositions - events)
774
776 vartypes = {}
777 for t,vars in zip(('z','e','p'), self.variables()):
778 for v in vars:
779 vartypes[v] = t
780 return vartypes
781
783 """
784 @return: (set<variables>, set<events>, set<propositions>)
785 """
786 return (set(), set(), set())
787
790
793
796
799
801 return hash(str(self))
802
804 - def __init__(self, label, refs, conds, consequent=None):
805 AbstractBoxerDrs.__init__(self)
806 self.label = label
807 self.refs = refs
808 self.conds = conds
809 self.consequent = consequent
810
820
826
828 if self.consequent:
829 consequent = self.consequent.clean()
830 else:
831 consequent = None
832 return BoxerDrs(self.label, self.refs, [c.clean() for c in self.conds], consequent)
833
840
842 s = 'drs(%s, [%s], [%s])' % (self.label,
843 ', '.join(map(str, self.refs)),
844 ', '.join(map(str, self.conds)))
845 if self.consequent is not None:
846 s = 'imp(%s, %s)' % (s, self.consequent)
847 return s
848
850 return self.__class__ == other.__class__ and \
851 self.label == other.label and \
852 self.refs == other.refs and \
853 len(self.conds) == len(other.conds) and \
854 reduce(operator.and_, (c1==c2 for c1,c2 in zip(self.conds, other.conds))) and \
855 self.consequent == other.consequent
856
861
864
866 return self.drs.atoms()
867
870
873
875 return 'not(%s)' % (self.drs)
876
878 return self.__class__ == other.__class__ and self.drs == other.drs
879
884
887
889 return 'event(%s)' % (self.var)
890
892 return self.__class__ == other.__class__ and self.var == other.var
893
895 - def __init__(self, discourse_id, sent_index, word_indices):
896 AbstractBoxerDrs.__init__(self)
897 self.discourse_id = discourse_id
898 self.sent_index = sent_index
899 self.word_indices = word_indices
900
903
905 return self.__class__ == other.__class__ and \
906 self.discourse_id == other.discourse_id and \
907 self.sent_index == other.sent_index and \
908 self.word_indices == other.word_indices and \
909 reduce(operator.and_, (s==o for s,o in zip(self, other)))
910
912 s = '%s(%s, %s, [%s]' % (self._pred(), self.discourse_id, self.sent_index, ', '.join(map(str, self.word_indices)))
913 for v in self:
914 s += ', %s' % v
915 return s + ')'
916
918 - def __init__(self, discourse_id, sent_index, word_indices, var, name, pos, sense):
924
927
929 return BoxerPred(self.discourse_id, self.sent_index, self.word_indices, var, self.name, self.pos, self.sense)
930
932 return BoxerPred(self.discourse_id, self.sent_index, self.word_indices, self.var, self._clean_name(self.name), self.pos, self.sense)
933
935 new_sent_index = f(self.sent_index)
936 return BoxerPred(self.discourse_id, new_sent_index, self.word_indices, self.var, self.name, self.pos, self.sense)
937
939 return iter((self.var, self.name, self.pos, self.sense))
940
943
945 - def __init__(self, discourse_id, sent_index, word_indices, var, name, type, sense):
951
954
956 return BoxerNamed(self.discourse_id, self.sent_index, self.word_indices, var, self.name, self.type, self.sense)
957
960
962 return BoxerNamed(self.discourse_id, f(self.sent_index), self.word_indices, self.var, self.name, self.type, self.sense)
963
965 return iter((self.var, self.name, self.type, self.sense))
966
969
971 - def __init__(self, discourse_id, sent_index, word_indices, var1, var2, rel, sense):
972 BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
973 self.var1 = var1
974 self.var2 = var2
975 self.rel = rel
976 self.sense = sense
977
979 return (set([self.var1, self.var2]), set(), set())
980
982 return BoxerRel(self.discourse_id, self.sent_index, self.word_indices, self.var1, self.var2, self._clean_name(self.rel), self.sense)
983
985 return BoxerRel(self.discourse_id, f(self.sent_index), self.word_indices, self.var1, self.var2, self.rel, self.sense)
986
988 return iter((self.var1, self.var2, self.rel, self.sense))
989
992
994 - def __init__(self, discourse_id, sent_index, word_indices, var, drs):
995 BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
996 self.var = var
997 self.drs = drs
998
1001
1003 return set([self.drs])
1004
1006 return self.drs.atoms()
1007
1009 return BoxerProp(self.discourse_id, self.sent_index, self.word_indices, self.var, self.drs.clean())
1010
1013
1015 return iter((self.var, self.drs))
1016
1019
1021 - def __init__(self, discourse_id, sent_index, word_indices, var1, var2):
1022 BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
1023 self.var1 = var1
1024 self.var2 = var2
1025
1027 return (set([self.var1, self.var2]), set(), set())
1028
1031
1033 return BoxerEq(self.discourse_id, f(self.sent_index), self.word_indices, self.var1, self.var2)
1034
1036 return iter((self.var1, self.var2))
1037
1040
1042 - def __init__(self, discourse_id, sent_index, word_indices, var, value, type):
1047
1050
1052 return BoxerCard(self.discourse_id, f(self.sent_index), self.word_indices, self.var, self.value, self.type)
1053
1055 return iter((self.var, self.value, self.type))
1056
1059
1061 - def __init__(self, discourse_id, sent_index, word_indices, drs1, drs2):
1062 BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
1063 self.drs1 = drs1
1064 self.drs2 = drs2
1065
1068
1071
1073 return BoxerOr(self.discourse_id, self.sent_index, self.word_indices, self.drs1.clean(), self.drs2.clean())
1074
1076 return BoxerOr(self.discourse_id, f(self.sent_index), self.word_indices, self.drs1, self.drs2)
1077
1079 return iter((self.drs1, self.drs2))
1080
1083
1085 - def __init__(self, discourse_id, sent_index, word_indices, ans_types, drs1, variable, drs2):
1086 BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
1087 self.ans_types = ans_types
1088 self.drs1 = drs1
1089 self.variable = variable
1090 self.drs2 = drs2
1091
1094
1097
1099 return BoxerWhq(self.discourse_id, self.sent_index, self.word_indices, self.ans_types, self.drs1.clean(), self.variable, self.drs2.clean())
1100
1102 return BoxerWhq(self.discourse_id, f(self.sent_index), self.word_indices, self.ans_types, self.drs1, self.variable, self.drs2)
1103
1105 return iter(('['+','.join(self.ans_types)+']', self.drs1, self.variable, self.drs2))
1106
1109
1110
1111
1115
1116
1118 - def __init__(self, occur_index=False):
1119 self._occur_index = occur_index
1120
1122 """
1123 @param ex: C{AbstractBoxerDrs}
1124 @return: C{AbstractDrs}
1125 """
1126 if isinstance(ex, BoxerDrs):
1127 drs = DRS([Variable('x%d' % r) for r in ex.refs], map(self.interpret, ex.conds))
1128 if ex.label is not None:
1129 drs.label = Variable('x%d' % ex.label)
1130 if ex.consequent is not None:
1131 drs.consequent = self.interpret(ex.consequent)
1132 return drs
1133 elif isinstance(ex, BoxerNot):
1134 return DrtNegatedExpression(self.interpret(ex.drs))
1135 elif isinstance(ex, BoxerEvent):
1136 return self._make_atom('event', 'x%d' % ex.var)
1137 elif isinstance(ex, BoxerPred):
1138 pred = self._add_occur_indexing('%s_%s' % (ex.pos, ex.name), ex)
1139 return self._make_atom(pred, 'x%d' % ex.var)
1140 elif isinstance(ex, BoxerNamed):
1141 pred = self._add_occur_indexing('ne_%s_%s' % (ex.type, ex.name), ex)
1142 return self._make_atom(pred, 'x%d' % ex.var)
1143 elif isinstance(ex, BoxerRel):
1144 pred = self._add_occur_indexing('%s' % (ex.rel), ex)
1145 return self._make_atom(pred, 'x%d' % ex.var1, 'x%d' % ex.var2)
1146 elif isinstance(ex, BoxerProp):
1147 return DrtProposition(Variable('x%d' % ex.var), self.interpret(ex.drs))
1148 elif isinstance(ex, BoxerEq):
1149 return DrtEqualityExpression(DrtVariableExpression(Variable('x%d' % ex.var1)),
1150 DrtVariableExpression(Variable('x%d' % ex.var2)))
1151 elif isinstance(ex, BoxerCard):
1152 pred = self._add_occur_indexing('card_%s_%s' % (ex.type, ex.value), ex)
1153 return self._make_atom(pred, 'x%d' % ex.var)
1154 elif isinstance(ex, BoxerOr):
1155 return DrtOrExpression(self.interpret(ex.drs1), self.interpret(ex.drs2))
1156 elif isinstance(ex, BoxerWhq):
1157 drs1 = self.interpret(ex.drs1)
1158 drs2 = self.interpret(ex.drs2)
1159 return DRS(drs1.refs + drs2.refs, drs1.conds + drs2.conds)
1160 assert False, '%s: %s' % (ex.__class__.__name__, ex)
1161
1167
1169 if self._occur_index and ex.sent_index is not None:
1170 if ex.discourse_id:
1171 base += '_%s' % ex.discourse_id
1172 base += '_s%s' % ex.sent_index
1173 base += '_w%s' % sorted(ex.word_indices)[0]
1174 return base
1175
1176
1179
1180
1181 if __name__ == '__main__':
1182 opts = OptionParser("usage: %prog TEXT [options]")
1183 opts.add_option("--verbose", "-v", help="display verbose logs", action="store_true", default=False, dest="verbose")
1184 opts.add_option("--fol", "-f", help="output FOL", action="store_true", default=False, dest="fol")
1185 opts.add_option("--question", "-q", help="input is a question", action="store_true", default=False, dest="question")
1186 opts.add_option("--occur", "-o", help="occurrence index", action="store_true", default=False, dest="occur_index")
1187 (options, args) = opts.parse_args()
1188
1189 if len(args) != 1:
1190 opts.error("incorrect number of arguments")
1191
1192 interpreter = NltkDrtBoxerDrsInterpreter(occur_index=options.occur_index)
1193 drs = Boxer(interpreter).interpret_multisentence(args[0].split(r'\n'), question=options.question, verbose=options.verbose)
1194 if drs is None:
1195 print None
1196 else:
1197 drs = drs.simplify().eliminate_equality()
1198 if options.fol:
1199 print drs.fol().normalize()
1200 else:
1201 drs.normalize().pprint()
1202