Package nltk :: Module internals
[hide private]
[frames] | no frames]

Source Code for Module nltk.internals

  1  # Natural Language Toolkit: Internal utility functions 
  2  # 
  3  # Copyright (C) 2001-2011 NLTK Project 
  4  # Author: Steven Bird <sb@csse.unimelb.edu.au> 
  5  #         Edward Loper <edloper@gradient.cis.upenn.edu> 
  6  #         Nitin Madnani <nmadnani@ets.org> 
  7  # URL: <http://www.nltk.org/> 
  8  # For license information, see LICENSE.TXT 
  9   
 10  import subprocess 
 11  import os 
 12  import os.path 
 13  import re 
 14  import warnings 
 15  import textwrap 
 16  import types 
 17  import sys 
 18  import stat 
 19   
 20  from nltk import __file__ 
 21   
 22  # Use the c version of ElementTree, which is faster, if possible: 
 23  try: from xml.etree import cElementTree as ElementTree 
 24  except ImportError: from nltk.etree import ElementTree 
 25   
 26  ###################################################################### 
 27  # Regular Expression Processing 
 28  ###################################################################### 
 29   
30 -def convert_regexp_to_nongrouping(pattern):
31 """ 32 Convert all grouping parenthases in the given regexp pattern to 33 non-grouping parenthases, and return the result. E.g.: 34 35 >>> convert_regexp_to_nongrouping('ab(c(x+)(z*))?d') 36 'ab(?:c(?:x+)(?:z*))?d' 37 38 @type pattern: C{str} 39 @rtype: C{str} 40 """ 41 # Sanity check: back-references are not allowed! 42 for s in re.findall(r'\\.|\(\?P=', pattern): 43 if s[1] in '0123456789' or s == '(?P=': 44 raise ValueError('Regular expressions with back-references ' 45 'are not supported: %r' % pattern) 46 47 # This regexp substitution function replaces the string '(' 48 # with the string '(?:', but otherwise makes no changes. 49 def subfunc(m): 50 return re.sub('^\((\?P<[^>]*>)?$', '(?:', m.group())
51 52 # Scan through the regular expression. If we see any backslashed 53 # characters, ignore them. If we see a named group, then 54 # replace it with "(?:". If we see any open parens that are part 55 # of an extension group, ignore those too. But if we see 56 # any other open paren, replace it with "(?:") 57 return re.sub(r'''(?x) 58 \\. | # Backslashed character 59 \(\?P<[^>]*> | # Named group 60 \(\? | # Extension group 61 \( # Grouping parenthasis''', subfunc, pattern) 62 63 64 ########################################################################## 65 # Java Via Command-Line 66 ########################################################################## 67 68 _java_bin = None 69 _java_options = [] 70 # [xx] add classpath option to config_java?
71 -def config_java(bin=None, options=None, verbose=True):
72 """ 73 Configure nltk's java interface, by letting nltk know where it can 74 find the C{java} binary, and what extra options (if any) should be 75 passed to java when it is run. 76 77 @param bin: The full path to the C{java} binary. If not specified, 78 then nltk will search the system for a C{java} binary; and if 79 one is not found, it will raise a C{LookupError} exception. 80 @type bin: C{string} 81 @param options: A list of options that should be passed to the 82 C{java} binary when it is called. A common value is 83 C{['-Xmx512m']}, which tells the C{java} binary to increase 84 the maximum heap size to 512 megabytes. If no options are 85 specified, then do not modify the options list. 86 @type options: C{list} of C{string} 87 """ 88 global _java_bin, _java_options 89 _java_bin = find_binary('java', bin, env_vars=['JAVAHOME', 'JAVA_HOME'], verbose=verbose) 90 91 if options is not None: 92 if isinstance(options, basestring): 93 options = options.split() 94 _java_options = list(options)
95
96 -def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, 97 blocking=True):
98 """ 99 Execute the given java command, by opening a subprocess that calls 100 C{java}. If java has not yet been configured, it will be configured 101 by calling L{config_java()} with no arguments. 102 103 @param cmd: The java command that should be called, formatted as 104 a list of strings. Typically, the first string will be the name 105 of the java class; and the remaining strings will be arguments 106 for that java class. 107 @type cmd: C{list} of C{string} 108 109 @param classpath: A C{':'} separated list of directories, JAR 110 archives, and ZIP archives to search for class files. 111 @type classpath: C{string} 112 113 @param stdin, stdout, stderr: Specify the executed programs' 114 standard input, standard output and standard error file 115 handles, respectively. Valid values are C{subprocess.PIPE}, 116 an existing file descriptor (a positive integer), an existing 117 file object, and C{None}. C{subprocess.PIPE} indicates that a 118 new pipe to the child should be created. With C{None}, no 119 redirection will occur; the child's file handles will be 120 inherited from the parent. Additionally, stderr can be 121 C{subprocess.STDOUT}, which indicates that the stderr data 122 from the applications should be captured into the same file 123 handle as for stdout. 124 125 @param blocking: If C{false}, then return immediately after 126 spawning the subprocess. In this case, the return value is 127 the C{Popen} object, and not a C{(stdout, stderr)} tuple. 128 129 @return: If C{blocking=True}, then return a tuple C{(stdout, 130 stderr)}, containing the stdout and stderr outputs generated 131 by the java command if the C{stdout} and C{stderr} parameters 132 were set to C{subprocess.PIPE}; or C{None} otherwise. If 133 C{blocking=False}, then return a C{subprocess.Popen} object. 134 135 @raise OSError: If the java command returns a nonzero return code. 136 """ 137 if stdin == 'pipe': stdin = subprocess.PIPE 138 if stdout == 'pipe': stdout = subprocess.PIPE 139 if stderr == 'pipe': stderr = subprocess.PIPE 140 if isinstance(cmd, basestring): 141 raise TypeError('cmd should be a list of strings') 142 143 # Make sure we know where a java binary is. 144 if _java_bin is None: 145 config_java() 146 147 # Set up the classpath. 148 if classpath is None: 149 classpath = NLTK_JAR 150 else: 151 classpath += ':' + NLTK_JAR 152 153 # Construct the full command string. 154 cmd = list(cmd) 155 cmd = ['-cp', classpath] + cmd 156 cmd = [_java_bin] + _java_options + cmd 157 158 # Call java via a subprocess 159 p = subprocess.Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr) 160 if not blocking: return p 161 (stdout, stderr) = p.communicate() 162 163 # Check the return code. 164 if p.returncode != 0: 165 print stderr 166 raise OSError('Java command failed!') 167 168 return (stdout, stderr)
169 170 #: The location of the NLTK jar file, which is used to communicate 171 #: with external Java packages (such as Mallet) that do not have 172 #: a sufficiently powerful native command-line interface. 173 NLTK_JAR = os.path.abspath(os.path.join(os.path.split(__file__)[0], 174 'nltk.jar')) 175 176 if 0: 177 #config_java(options='-Xmx512m') 178 # Write: 179 #java('weka.classifiers.bayes.NaiveBayes', 180 # ['-d', '/tmp/names.model', '-t', '/tmp/train.arff'], 181 # classpath='/Users/edloper/Desktop/weka/weka.jar') 182 # Read: 183 (a,b) = java(['weka.classifiers.bayes.NaiveBayes', 184 '-l', '/tmp/names.model', '-T', '/tmp/test.arff', 185 '-p', '0'],#, '-distribution'], 186 classpath='/Users/edloper/Desktop/weka/weka.jar') 187 188 189 ###################################################################### 190 # Parsing 191 ###################################################################### 192
193 -class ParseError(ValueError):
194 """ 195 Exception raised by parse_* functions when they fail. 196 @param position: The index in the input string where an error occured. 197 @param expected: What was expected when an error occured. 198 """
199 - def __init__(self, expected, position):
200 ValueError.__init__(self, expected, position) 201 self.expected = expected 202 self.position = position
203 - def __str__(self):
204 return 'Expected %s at %s' % (self.expected, self.position)
205 206 _STRING_START_RE = re.compile(r"[uU]?[rR]?(\"\"\"|\'\'\'|\"|\')")
207 -def parse_str(s, start_position):
208 """ 209 If a Python string literal begins at the specified position in the 210 given string, then return a tuple C{(val, end_position)} 211 containing the value of the string literal and the position where 212 it ends. Otherwise, raise a L{ParseError}. 213 """ 214 # Read the open quote, and any modifiers. 215 m = _STRING_START_RE.match(s, start_position) 216 if not m: raise ParseError('open quote', start_position) 217 quotemark = m.group(1) 218 219 # Find the close quote. 220 _STRING_END_RE = re.compile(r'\\|%s' % quotemark) 221 position = m.end() 222 while True: 223 match = _STRING_END_RE.search(s, position) 224 if not match: raise ParseError('close quote', position) 225 if match.group(0) == '\\': position = match.end()+1 226 else: break 227 228 # Parse it, using eval. Strings with invalid escape sequences 229 # might raise ValueEerror. 230 try: 231 return eval(s[start_position:match.end()]), match.end() 232 except ValueError, e: 233 raise ParseError('valid string (%s)' % e, start)
234 235 _PARSE_INT_RE = re.compile(r'-?\d+')
236 -def parse_int(s, start_position):
237 """ 238 If an integer begins at the specified position in the given 239 string, then return a tuple C{(val, end_position)} containing the 240 value of the integer and the position where it ends. Otherwise, 241 raise a L{ParseError}. 242 """ 243 m = _PARSE_INT_RE.match(s, start_position) 244 if not m: raise ParseError('integer', start_position) 245 return int(m.group()), m.end()
246 247 _PARSE_NUMBER_VALUE = re.compile(r'-?(\d*)([.]?\d*)?')
248 -def parse_number(s, start_position):
249 """ 250 If an integer or float begins at the specified position in the 251 given string, then return a tuple C{(val, end_position)} 252 containing the value of the number and the position where it ends. 253 Otherwise, raise a L{ParseError}. 254 """ 255 m = _PARSE_NUMBER_VALUE.match(s, start_position) 256 if not m or not (m.group(1) or m.group(2)): 257 raise ParseError('number', start_position) 258 if m.group(2): return float(m.group()), m.end() 259 else: return int(m.group()), m.end()
260 261 262 263 ###################################################################### 264 # Check if a method has been overridden 265 ###################################################################### 266
267 -def overridden(method):
268 """ 269 @return: True if C{method} overrides some method with the same 270 name in a base class. This is typically used when defining 271 abstract base classes or interfaces, to allow subclasses to define 272 either of two related methods: 273 274 >>> class EaterI: 275 ... '''Subclass must define eat() or batch_eat().''' 276 ... def eat(self, food): 277 ... if overridden(self.batch_eat): 278 ... return self.batch_eat([food])[0] 279 ... else: 280 ... raise NotImplementedError() 281 ... def batch_eat(self, foods): 282 ... return [self.eat(food) for food in foods] 283 284 @type method: instance method 285 """ 286 # [xx] breaks on classic classes! 287 if isinstance(method, types.MethodType) and method.im_class is not None: 288 name = method.__name__ 289 funcs = [cls.__dict__[name] 290 for cls in _mro(method.im_class) 291 if name in cls.__dict__] 292 return len(funcs) > 1 293 else: 294 raise TypeError('Expected an instance method.')
295
296 -def _mro(cls):
297 """ 298 Return the I{method resolution order} for C{cls} -- i.e., a list 299 containing C{cls} and all its base classes, in the order in which 300 they would be checked by C{getattr}. For new-style classes, this 301 is just cls.__mro__. For classic classes, this can be obtained by 302 a depth-first left-to-right traversal of C{__bases__}. 303 """ 304 if isinstance(cls, type): 305 return cls.__mro__ 306 else: 307 mro = [cls] 308 for base in cls.__bases__: mro.extend(_mro(base)) 309 return mro
310 311 ###################################################################### 312 # Deprecation decorator & base class 313 ###################################################################### 314 # [xx] dedent msg first if it comes from a docstring. 315
316 -def _add_epytext_field(obj, field, message):
317 """Add an epytext @field to a given object's docstring.""" 318 indent = '' 319 # If we already have a docstring, then add a blank line to separate 320 # it from the new field, and check its indentation. 321 if obj.__doc__: 322 obj.__doc__ = obj.__doc__.rstrip()+'\n\n' 323 indents = re.findall(r'(?<=\n)[ ]+(?!\s)', obj.__doc__.expandtabs()) 324 if indents: indent = min(indents) 325 # If we don't have a docstring, add an empty one. 326 else: 327 obj.__doc__ = '' 328 329 obj.__doc__ += textwrap.fill('@%s: %s' % (field, message), 330 initial_indent=indent, 331 subsequent_indent=indent+' ')
332
333 -def deprecated(message):
334 """ 335 A decorator used to mark functions as deprecated. This will cause 336 a warning to be printed the when the function is used. Usage: 337 338 >>> @deprecated('Use foo() instead') 339 >>> def bar(x): 340 ... print x/10 341 """ 342 def decorator(func): 343 msg = ("Function %s() has been deprecated. %s" 344 % (func.__name__, message)) 345 msg = '\n' + textwrap.fill(msg, initial_indent=' ', 346 subsequent_indent=' ') 347 def newFunc(*args, **kwargs): 348 warnings.warn(msg, category=DeprecationWarning, stacklevel=2) 349 return func(*args, **kwargs)
350 351 # Copy the old function's name, docstring, & dict 352 newFunc.__dict__.update(func.__dict__) 353 newFunc.__name__ = func.__name__ 354 newFunc.__doc__ = func.__doc__ 355 newFunc.__deprecated__ = True 356 # Add a @deprecated field to the docstring. 357 _add_epytext_field(newFunc, 'deprecated', message) 358 return newFunc 359 return decorator 360
361 -class Deprecated(object):
362 """ 363 A base class used to mark deprecated classes. A typical usage is to 364 alert users that the name of a class has changed: 365 366 >>> class OldClassName(Deprecated, NewClassName): 367 ... "Use NewClassName instead." 368 369 The docstring of the deprecated class will be used in the 370 deprecation warning message. 371 """
372 - def __new__(cls, *args, **kwargs):
373 # Figure out which class is the deprecated one. 374 dep_cls = None 375 for base in _mro(cls): 376 if Deprecated in base.__bases__: 377 dep_cls = base; break 378 assert dep_cls, 'Unable to determine which base is deprecated.' 379 380 # Construct an appropriate warning. 381 doc = dep_cls.__doc__ or ''.strip() 382 # If there's a @deprecated field, strip off the field marker. 383 doc = re.sub(r'\A\s*@deprecated:', r'', doc) 384 # Strip off any indentation. 385 doc = re.sub(r'(?m)^\s*', '', doc) 386 # Construct a 'name' string. 387 name = 'Class %s' % dep_cls.__name__ 388 if cls != dep_cls: 389 name += ' (base class for %s)' % cls.__name__ 390 # Put it all together. 391 msg = '%s has been deprecated. %s' % (name, doc) 392 # Wrap it. 393 msg = '\n' + textwrap.fill(msg, initial_indent=' ', 394 subsequent_indent=' ') 395 warnings.warn(msg, category=DeprecationWarning, stacklevel=2) 396 # Do the actual work of __new__. 397 return object.__new__(cls, *args, **kwargs)
398 399 ########################################################################## 400 # COUNTER, FOR UNIQUE NAMING 401 ########################################################################## 402
403 -class Counter:
404 """ 405 A counter that auto-increments each time its value is read. 406 """
407 - def __init__(self, initial_value=0):
408 self._value = initial_value
409 - def get(self):
410 self._value += 1 411 return self._value
412 413 ########################################################################## 414 # Search for binaries 415 ########################################################################## 416
417 -def find_binary(name, path_to_bin=None, env_vars=(), 418 searchpath=(), binary_names=None, url=None, 419 verbose=True):
420 """ 421 Search for the binary for a program that is used by nltk. 422 423 @param name: The name of the program 424 @param path_to_bin: The user-supplied binary location, or None. 425 @param env_vars: A list of environment variable names to check 426 @param binary_names: A list of alternative binary names to check. 427 @param searchpath: List of directories to search. 428 """ 429 if binary_names is None: binary_names = [name] 430 assert isinstance(name, basestring) 431 assert not isinstance(binary_names, basestring) 432 assert not isinstance(searchpath, basestring) 433 if isinstance(env_vars, basestring): 434 env_vars = env_vars.split() 435 436 # If an explicit bin was given, then check it, and return it if 437 # it's present; otherwise, complain. 438 if path_to_bin is not None: 439 if os.path.isfile(path_to_bin): 440 return path_to_bin 441 for bin in binary_names: 442 if os.path.isfile(os.path.join(path_to_bin, bin)): 443 return os.path.join(path_to_bin, bin) 444 if os.path.isfile(os.path.join(path_to_bin, 'bin', bin)): 445 return os.path.join(path_to_bin, 'bin', bin) 446 raise ValueError('Could not find %s binary at %s' % 447 (name, path_to_bin)) 448 449 # Check environment variables 450 for env_var in env_vars: 451 if env_var in os.environ: 452 path_to_bin = os.environ[env_var] 453 if os.path.isfile(path_to_bin): 454 if verbose: print '[Found %s: %s]' % (name, path_to_bin) 455 return os.environ[env_var] 456 else: 457 for bin_name in binary_names: 458 path_to_bin = os.path.join(os.environ[env_var], bin_name) 459 if os.path.isfile(path_to_bin): 460 if verbose: print '[Found %s: %s]'%(name, path_to_bin) 461 return path_to_bin 462 path_to_bin = os.path.join(os.environ[env_var], 'bin', 463 bin_name) 464 if os.path.isfile(path_to_bin): 465 if verbose: print '[Found %s: %s]'%(name, path_to_bin) 466 return path_to_bin 467 468 # Check the path list. 469 for directory in searchpath: 470 for bin in binary_names: 471 path_to_bin = os.path.join(directory, bin) 472 if os.path.isfile(path_to_bin): 473 return path_to_bin 474 475 476 # If we're on a POSIX system, then try using the 'which' command 477 # to find the binary. 478 if os.name == 'posix': 479 for bin in binary_names: 480 try: 481 p = subprocess.Popen(['which', bin], stdout=subprocess.PIPE) 482 stdout, stderr = p.communicate() 483 path = stdout.strip() 484 if path.endswith(bin) and os.path.exists(path): 485 if verbose: print '[Found %s: %s]' % (name, path) 486 return path 487 except KeyboardInterrupt, SystemExit: 488 raise 489 except: 490 pass 491 492 msg = ("NLTK was unable to find the %s executable! Use " 493 "config_%s()" % (name, name)) 494 if env_vars: msg += ' or set the %s environment variable' % env_vars[0] 495 msg = textwrap.fill(msg+'.', initial_indent=' ', 496 subsequent_indent=' ') 497 msg += "\n\n >>> config_%s('/path/to/%s')" % (name, name) 498 if searchpath: 499 msg += '\n\n Searched in:' 500 msg += ''.join('\n - %s' % d for d in searchpath) 501 if url: msg += ('\n\n For more information, on %s, see:\n <%s>' % 502 (name, url)) 503 div = '='*75 504 raise LookupError('\n\n%s\n%s\n%s' % (div, msg, div))
505 506 ########################################################################## 507 # Find Java JAR files 508 # TODO: Add support for jar names specified as regular expressions 509 ########################################################################## 510
511 -def find_jar(name, path_to_jar=None, env_vars=(), 512 searchpath=(), url=None, verbose=True):
513 """ 514 Search for a jar that is used by nltk. 515 516 @param name: The name of the jar file 517 @param path_to_jar: The user-supplied jar location, or None. 518 @param env_vars: A list of environment variable names to check 519 in addition to the CLASSPATH variable which is 520 checked by default. 521 @param searchpath: List of directories to search. 522 """ 523 524 assert isinstance(name, basestring) 525 assert not isinstance(searchpath, basestring) 526 if isinstance(env_vars, basestring): 527 env_vars = env_vars.split() 528 529 # Make sure we check the CLASSPATH first 530 env_vars = ['CLASSPATH'] + list(env_vars) 531 532 # If an explicit location was given, then check it, and return it if 533 # it's present; otherwise, complain. 534 if path_to_jar is not None: 535 if os.path.isfile(path_to_jar): 536 return path_to_jar 537 raise ValueError('Could not find %s jar file at %s' % 538 (name, path_to_jar)) 539 540 # Check environment variables 541 for env_var in env_vars: 542 if env_var in os.environ: 543 if env_var == 'CLASSPATH': 544 classpath = os.environ['CLASSPATH'] 545 for cp in classpath.split(':'): 546 if os.path.isfile(cp) and os.path.basename(cp) == name: 547 if verbose: print '[Found %s: %s]' % (name, cp) 548 return cp 549 else: 550 path_to_jar = os.environ[env_var] 551 if os.path.isfile(path_to_jar) and os.path.basename(path_to_jar) == name: 552 if verbose: print '[Found %s: %s]' % (name, path_to_jar) 553 return path_to_jar 554 555 # Check the path list. 556 for directory in searchpath: 557 path_to_jar = os.path.join(directory, name) 558 if os.path.isfile(path_to_jar): 559 if verbose: print '[Found %s: %s]' % (name, path_to_jar) 560 return path_to_jar 561 562 # If nothing was found, raise an error 563 msg = ("NLTK was unable to find %s!" % name) 564 if env_vars: msg += ' Set the %s environment variable' % env_vars[0] 565 msg = textwrap.fill(msg+'.', initial_indent=' ', 566 subsequent_indent=' ') 567 if searchpath: 568 msg += '\n\n Searched in:' 569 msg += ''.join('\n - %s' % d for d in searchpath) 570 if url: msg += ('\n\n For more information, on %s, see:\n <%s>' % 571 (name, url)) 572 div = '='*75 573 raise LookupError('\n\n%s\n%s\n%s' % (div, msg, div))
574 575 ########################################################################## 576 # Import Stdlib Module 577 ########################################################################## 578
579 -def import_from_stdlib(module):
580 """ 581 When python is run from within the nltk/ directory tree, the 582 current directory is included at the beginning of the search path. 583 Unfortunately, that means that modules within nltk can sometimes 584 shadow standard library modules. As an example, the stdlib 585 'inspect' module will attempt to import the stdlib 'tokenzie' 586 module, but will instead end up importing NLTK's 'tokenize' module 587 instead (causing the import to fail). 588 """ 589 old_path = sys.path 590 sys.path = [d for d in sys.path if d not in ('', '.')] 591 m = __import__(module) 592 sys.path = old_path 593 return m
594 595 ########################################################################## 596 # Abstract declaration 597 ########################################################################## 598
599 -def abstract(func):
600 """ 601 A decorator used to mark methods as abstract. I.e., methods that 602 are marked by this decorator must be overridden by subclasses. If 603 an abstract method is called (either in the base class or in a 604 subclass that does not override the base class method), it will 605 raise C{NotImplementedError}. 606 """ 607 # Avoid problems caused by nltk.tokenize shadowing the stdlib tokenize: 608 inspect = import_from_stdlib('inspect') 609 610 # Read the function's signature. 611 args, varargs, varkw, defaults = inspect.getargspec(func) 612 613 # Create a new function with the same signature (minus defaults) 614 # that raises NotImplementedError. 615 msg = '%s is an abstract method.' % func.__name__ 616 signature = inspect.formatargspec(args, varargs, varkw, ()) 617 exec ('def newfunc%s: raise NotImplementedError(%r)' % (signature, msg)) 618 619 # Substitute in the defaults after-the-fact, since eval(repr(val)) 620 # may not work for some default values. 621 newfunc.func_defaults = func.func_defaults 622 623 # Copy the name and docstring 624 newfunc.__name__ = func.__name__ 625 newfunc.__doc__ = func.__doc__ 626 newfunc.__abstract__ = True 627 _add_epytext_field(newfunc, "note", "This method is abstract.") 628 629 # Return the function. 630 return newfunc
631 632 ########################################################################## 633 # Wrapper for ElementTree Elements 634 ########################################################################## 635
636 -class ElementWrapper(object):
637 """ 638 A wrapper around ElementTree Element objects whose main purpose is 639 to provide nicer __repr__ and __str__ methods. In addition, any 640 of the wrapped Element's methods that return other Element objects 641 are overridden to wrap those values before returning them. 642 643 This makes Elements more convenient to work with in 644 interactive sessions and doctests, at the expense of some 645 efficiency. 646 """ 647 648 # Prevent double-wrapping:
649 - def __new__(cls, etree):
650 """ 651 Create and return a wrapper around a given Element object. 652 If C{etree} is an C{ElementWrapper}, then C{etree} is 653 returned as-is. 654 """ 655 if isinstance(etree, ElementWrapper): 656 return etree 657 else: 658 return object.__new__(ElementWrapper, etree)
659
660 - def __init__(self, etree):
661 """ 662 Initialize a new Element wrapper for C{etree}. If 663 C{etree} is a string, then it will be converted to an 664 Element object using C{ElementTree.fromstring()} first. 665 """ 666 if isinstance(etree, basestring): 667 etree = ElementTree.fromstring(etree) 668 self.__dict__['_etree'] = etree
669
670 - def unwrap(self):
671 """ 672 Return the Element object wrapped by this wrapper. 673 """ 674 return self._etree
675 676 ##//////////////////////////////////////////////////////////// 677 #{ String Representation 678 ##//////////////////////////////////////////////////////////// 679
680 - def __repr__(self):
681 s = ElementTree.tostring(self._etree) 682 if len(s) > 60: 683 e = s.rfind('<') 684 if (len(s)-e) > 30: e = -20 685 s = '%s...%s' % (s[:30], s[e:]) 686 return '<Element %r>' % s
687
688 - def __str__(self):
689 """ 690 @return: the result of applying C{ElementTree.tostring()} to 691 the wrapped Element object. 692 """ 693 return ElementTree.tostring(self._etree).rstrip()
694 695 ##//////////////////////////////////////////////////////////// 696 #{ Element interface Delegation (pass-through) 697 ##//////////////////////////////////////////////////////////// 698
699 - def __getattr__(self, attrib):
700 return getattr(self._etree, attrib)
701
702 - def __setattr__(self, attr, value):
703 return setattr(self._etree, attr, value)
704
705 - def __delattr__(self, attr):
706 return delattr(self._etree, attr)
707
708 - def __setitem__(self, index, element):
709 self._etree[index] = element
710
711 - def __delitem__(self, index):
712 del self._etree[index]
713
714 - def __setslice__(self, start, stop, elements):
715 self._etree[start:stop] = elements
716
717 - def __delslice__(self, start, stop):
718 del self._etree[start:stop]
719
720 - def __len__(self):
721 return len(self._etree)
722 723 ##//////////////////////////////////////////////////////////// 724 #{ Element interface Delegation (wrap result) 725 ##//////////////////////////////////////////////////////////// 726
727 - def __getitem__(self, index):
728 return ElementWrapper(self._etree[index])
729
730 - def __getslice__(self, start, stop):
731 return [ElementWrapper(elt) for elt in self._etree[start:stop]]
732
733 - def getchildren(self):
734 return [ElementWrapper(elt) for elt in self._etree]
735
736 - def getiterator(self, tag=None):
737 return (ElementWrapper(elt) 738 for elt in self._etree.getiterator(tag))
739
740 - def makeelement(self, tag, attrib):
741 return ElementWrapper(self._etree.makeelement(tag, attrib))
742
743 - def find(self, path):
744 elt = self._etree.find(path) 745 if elt is None: return elt 746 else: return ElementWrapper(elt)
747
748 - def findall(self, path):
749 return [ElementWrapper(elt) for elt in self._etree.findall(path)]
750 751 ###################################################################### 752 # Helper for Handling Slicing 753 ###################################################################### 754
755 -def slice_bounds(sequence, slice_obj, allow_step=False):
756 """ 757 Given a slice, return the corresponding (start, stop) bounds, 758 taking into account None indices and negative indices. The 759 following guarantees are made for the returned start and stop values: 760 761 - 0 <= start <= len(sequence) 762 - 0 <= stop <= len(sequence) 763 - start <= stop 764 765 @raise ValueError: If C{slice_obj.step} is not C{None}. 766 @param allow_step: If true, then the slice object may have a 767 non-None step. If it does, then return a tuple 768 (start, stop, step). 769 """ 770 start, stop = (slice_obj.start, slice_obj.stop) 771 772 # If allow_step is true, then include the step in our return 773 # value tuple. 774 if allow_step: 775 if slice_obj.step is None: slice_obj.step = 1 776 # Use a recursive call without allow_step to find the slice 777 # bounds. If step is negative, then the roles of start and 778 # stop (in terms of default values, etc), are swapped. 779 if slice_obj.step < 0: 780 start, stop = slice_bounds(sequence, slice(stop, start)) 781 else: 782 start, stop = slice_bounds(sequence, slice(start, stop)) 783 return start, stop, slice_obj.step 784 785 # Otherwise, make sure that no non-default step value is used. 786 elif slice_obj.step not in (None, 1): 787 raise ValueError('slices with steps are not supported by %s' % 788 sequence.__class__.__name__) 789 790 # Supply default offsets. 791 if start is None: start = 0 792 if stop is None: stop = len(sequence) 793 794 # Handle negative indices. 795 if start < 0: start = max(0, len(sequence)+start) 796 if stop < 0: stop = max(0, len(sequence)+stop) 797 798 # Make sure stop doesn't go past the end of the list. Note that 799 # we avoid calculating len(sequence) if possible, because for lazy 800 # sequences, calculating the length of a sequence can be expensive. 801 if stop > 0: 802 try: sequence[stop-1] 803 except IndexError: stop = len(sequence) 804 805 # Make sure start isn't past stop. 806 start = min(start, stop) 807 808 # That's all folks! 809 return start, stop
810 811 ###################################################################### 812 # Permission Checking 813 ###################################################################### 814
815 -def is_writable(path):
816 # Ensure that it exists. 817 if not os.path.exists(path): 818 return False 819 820 # If we're on a posix system, check its permissions. 821 if hasattr(os, 'getuid'): 822 statdata = os.stat(path) 823 perm = stat.S_IMODE(statdata.st_mode) 824 # is it world-writable? 825 if (perm & 0002): 826 return True 827 # do we own it? 828 elif statdata.st_uid == os.getuid() and (perm & 0200): 829 return True 830 # are we in a group that can write to it? 831 elif statdata.st_gid == os.getgid() and (perm & 0020): 832 return True 833 # otherwise, we can't write to it. 834 else: 835 return False 836 837 # Otherwise, we'll assume it's writable. 838 # [xx] should we do other checks on other platforms? 839 return True
840