Commit e1142d2d authored by Andreas Jung's avatar Andreas Jung

gone

parent a7b62271
# Author: David Goodger
# Contact: goodger@users.sourceforge.net
# Revision: $Revision: 1.3 $
# Date: $Date: 2003/07/10 15:49:30 $
# Copyright: This module has been placed in the public domain.
"""
This is the Docutils (Python Documentation Utilities) package.
Package Structure
=================
Modules:
- __init__.py: Contains the package docstring only (this text).
- core.py: Contains the ``Publisher`` class and ``publish()`` convenience
function.
- frontend.py: Command-line and common processing for Docutils front-ends.
- io.py: Provides a uniform API for low-level input and output.
- nodes.py: Docutils document tree (doctree) node class library.
- statemachine.py: A finite state machine specialized for
regular-expression-based text filters.
- urischemes.py: Contains a complete mapping of known URI addressing
scheme names to descriptions.
- utils.py: Contains the ``Reporter`` system warning class and miscellaneous
utilities.
Subpackages:
- languages: Language-specific mappings of terms.
- parsers: Syntax-specific input parser modules or packages.
- readers: Context-specific input handlers which understand the data
source and manage a parser.
- transforms: Modules used by readers and writers to modify DPS
doctrees.
- writers: Format-specific output translators.
"""
__docformat__ = 'reStructuredText'
__version__ = '0.3.0'
"""``major.minor.micro`` version number. The micro number is bumped any time
there's a change in the API incompatible with one of the front ends. The
minor number is bumped whenever there is a project release. The major number
will be bumped when the project is feature-complete, and perhaps if there is a
major change in the design."""
class ApplicationError(StandardError): pass
class DataError(ApplicationError): pass
class SettingsSpec:
"""
Runtime setting specification base class.
SettingsSpec subclass objects used by `docutils.frontend.OptionParser`.
"""
settings_spec = ()
"""Runtime settings specification. Override in subclasses.
Specifies runtime settings and associated command-line options, as used by
`docutils.frontend.OptionParser`. This tuple contains one or more sets of
option group title, description, and a list/tuple of tuples: ``('help
text', [list of option strings], {keyword arguments})``. Group title
and/or description may be `None`; no group title implies no group, just a
list of single options. Runtime settings names are derived implicitly
from long option names ("--a-setting" becomes ``settings.a_setting``) or
explicitly from the "dest" keyword argument."""
settings_defaults = None
"""A dictionary of defaults for internal or inaccessible (by command-line
or config file) settings. Override in subclasses."""
settings_default_overrides = None
"""A dictionary of auxiliary defaults, to override defaults for settings
defined in other components. Override in subclasses."""
relative_path_settings = ()
"""Settings containing filesystem paths. Override in subclasses.
Settings listed here are to be interpreted relative to the current working
directory."""
class TransformSpec:
"""
Runtime transform specification base class.
TransformSpec subclass objects used by `docutils.transforms.Transformer`.
"""
default_transforms = ()
"""Transforms required by this class. Override in subclasses."""
class Component(SettingsSpec, TransformSpec):
"""Base class for Docutils components."""
component_type = None
"""Override in subclasses."""
supported = ()
"""Names for this component. Override in subclasses."""
def supports(self, format):
"""
Is `format` supported by this component?
To be used by transforms to ask the dependent component if it supports
a certain input context or output format.
"""
return format in self.supported
# Authors: David Goodger
# Contact: goodger@users.sourceforge.net
# Revision: $Revision: 1.3 $
# Date: $Date: 2003/07/10 15:49:30 $
# Copyright: This module has been placed in the public domain.
"""
Calling the ``publish_*`` convenience functions (or instantiating a
`Publisher` object) with component names will result in default
behavior. For custom behavior (setting component options), create
custom component objects first, and pass *them* to
``publish_*``/`Publisher`.
"""
__docformat__ = 'reStructuredText'
import sys
from docutils import Component, __version__
from docutils import frontend, io, utils, readers, parsers, writers
from docutils.frontend import OptionParser
class Publisher:
"""
A facade encapsulating the high-level logic of a Docutils system.
"""
def __init__(self, reader=None, parser=None, writer=None,
source=None, source_class=io.FileInput,
destination=None, destination_class=io.FileOutput,
settings=None):
"""
Initial setup. If any of `reader`, `parser`, or `writer` are not
specified, the corresponding ``set_...`` method should be called with
a component name (`set_reader` sets the parser as well).
"""
self.reader = reader
"""A `readers.Reader` instance."""
self.parser = parser
"""A `parsers.Parser` instance."""
self.writer = writer
"""A `writers.Writer` instance."""
self.source = source
"""The source of input data, an `io.Input` instance."""
self.source_class = source_class
"""The class for dynamically created source objects."""
self.destination = destination
"""The destination for docutils output, an `io.Output` instance."""
self.destination_class = destination_class
"""The class for dynamically created destination objects."""
self.settings = settings
"""An object containing Docutils settings as instance attributes.
Set by `self.process_command_line()` or `self.get_settings()`."""
def set_reader(self, reader_name, parser, parser_name):
"""Set `self.reader` by name."""
reader_class = readers.get_reader_class(reader_name)
self.reader = reader_class(parser, parser_name)
self.parser = self.reader.parser
def set_writer(self, writer_name):
"""Set `self.writer` by name."""
writer_class = writers.get_writer_class(writer_name)
self.writer = writer_class()
def set_components(self, reader_name, parser_name, writer_name):
if self.reader is None:
self.set_reader(reader_name, self.parser, parser_name)
if self.parser is None:
if self.reader.parser is None:
self.reader.set_parser(parser_name)
self.parser = self.reader.parser
if self.writer is None:
self.set_writer(writer_name)
def setup_option_parser(self, usage=None, description=None,
settings_spec=None, **defaults):
#@@@ Add self.source & self.destination to components in future?
option_parser = OptionParser(
components=(settings_spec, self.parser, self.reader, self.writer),
defaults=defaults, read_config_files=1,
usage=usage, description=description)
return option_parser
def get_settings(self, usage=None, description=None,
settings_spec=None, **defaults):
"""
Set and return default settings (overrides in `defaults` keyword
argument).
Set components first (`self.set_reader` & `self.set_writer`).
Explicitly setting `self.settings` disables command line option
processing from `self.publish()`.
"""
option_parser = self.setup_option_parser(usage, description,
settings_spec, **defaults)
self.settings = option_parser.get_default_values()
return self.settings
def process_command_line(self, argv=None, usage=None, description=None,
settings_spec=None, **defaults):
"""
Pass an empty list to `argv` to avoid reading `sys.argv` (the
default).
Set components first (`self.set_reader` & `self.set_writer`).
"""
option_parser = self.setup_option_parser(usage, description,
settings_spec, **defaults)
if argv is None:
argv = sys.argv[1:]
self.settings = option_parser.parse_args(argv)
def set_io(self, source_path=None, destination_path=None):
if self.source is None:
self.set_source(source_path=source_path)
if self.destination is None:
self.set_destination(destination_path=destination_path)
def set_source(self, source=None, source_path=None):
if source_path is None:
source_path = self.settings._source
else:
self.settings._source = source_path
self.source = self.source_class(
source=source, source_path=source_path,
encoding=self.settings.input_encoding)
def set_destination(self, destination=None, destination_path=None):
if destination_path is None:
destination_path = self.settings._destination
else:
self.settings._destination = destination_path
self.destination = self.destination_class(
destination=destination, destination_path=destination_path,
encoding=self.settings.output_encoding,
error_handler=self.settings.output_encoding_error_handler)
def apply_transforms(self, document):
document.transformer.populate_from_components(
(self.source, self.reader, self.reader.parser, self.writer,
self.destination))
document.transformer.apply_transforms()
def publish(self, argv=None, usage=None, description=None,
settings_spec=None, settings_overrides=None,
enable_exit=None):
"""
Process command line options and arguments (if `self.settings` not
already set), run `self.reader` and then `self.writer`. Return
`self.writer`'s output.
"""
if self.settings is None:
self.process_command_line(argv, usage, description, settings_spec,
**(settings_overrides or {}))
elif settings_overrides:
self.settings._update(settings_overrides, 'loose')
self.set_io()
exit = None
document = None
try:
document = self.reader.read(self.source, self.parser,
self.settings)
self.apply_transforms(document)
output = self.writer.write(document, self.destination)
except utils.SystemMessage, error:
if self.settings.traceback:
raise
print >>sys.stderr, ('Exiting due to level-%s (%s) system message.'
% (error.level,
utils.Reporter.levels[error.level]))
exit = 1
except Exception, error:
if self.settings.traceback:
raise
print >>sys.stderr, error
print >>sys.stderr, ("""\
Exiting due to error. Use "--traceback" to diagnose.
Please report errors to <docutils-users@lists.sf.net>.
Include "--traceback" output, Docutils version (%s),
Python version (%s), your OS type & version, and the
command line used.""" % (__version__, sys.version.split()[0]))
exit = 1
if self.settings.dump_settings:
from pprint import pformat
print >>sys.stderr, '\n::: Runtime settings:'
print >>sys.stderr, pformat(self.settings.__dict__)
if self.settings.dump_internals and document:
from pprint import pformat
print >>sys.stderr, '\n::: Document internals:'
print >>sys.stderr, pformat(document.__dict__)
if self.settings.dump_transforms and document:
from pprint import pformat
print >>sys.stderr, '\n::: Transforms applied:'
print >>sys.stderr, pformat(document.transformer.applied)
if self.settings.dump_pseudo_xml and document:
print >>sys.stderr, '\n::: Pseudo-XML:'
print >>sys.stderr, document.pformat().encode(
'raw_unicode_escape')
if enable_exit and document and (document.reporter.max_level
>= self.settings.exit_level):
sys.exit(document.reporter.max_level + 10)
elif exit:
sys.exit(1)
return output
default_usage = '%prog [options] [<source> [<destination>]]'
default_description = ('Reads from <source> (default is stdin) and writes to '
'<destination> (default is stdout).')
def publish_cmdline(reader=None, reader_name='standalone',
parser=None, parser_name='restructuredtext',
writer=None, writer_name='pseudoxml',
settings=None, settings_spec=None,
settings_overrides=None, enable_exit=1, argv=None,
usage=default_usage, description=default_description):
"""
Set up & run a `Publisher`. For command-line front ends.
Parameters:
- `reader`: A `docutils.readers.Reader` object.
- `reader_name`: Name or alias of the Reader class to be instantiated if
no `reader` supplied.
- `parser`: A `docutils.parsers.Parser` object.
- `parser_name`: Name or alias of the Parser class to be instantiated if
no `parser` supplied.
- `writer`: A `docutils.writers.Writer` object.
- `writer_name`: Name or alias of the Writer class to be instantiated if
no `writer` supplied.
- `settings`: Runtime settings object.
- `settings_spec`: Extra settings specification; a `docutils.SettingsSpec`
subclass. Used only if no `settings` specified.
- `settings_overrides`: A dictionary containing program-specific overrides
of component settings.
- `enable_exit`: Boolean; enable exit status at end of processing?
- `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
- `usage`: Usage string, output if there's a problem parsing the command
line.
- `description`: Program description, output for the "--help" option
(along with command-line option descriptions).
"""
pub = Publisher(reader, parser, writer, settings=settings)
pub.set_components(reader_name, parser_name, writer_name)
pub.publish(argv, usage, description, settings_spec, settings_overrides,
enable_exit=enable_exit)
def publish_file(source=None, source_path=None,
destination=None, destination_path=None,
reader=None, reader_name='standalone',
parser=None, parser_name='restructuredtext',
writer=None, writer_name='pseudoxml',
settings=None, settings_spec=None, settings_overrides=None,
enable_exit=None):
"""
Set up & run a `Publisher`. For programmatic use with file-like I/O.
Parameters:
- `source`: A file-like object (must have "read" and "close" methods).
- `source_path`: Path to the input file. Opened if no `source` supplied.
If neither `source` nor `source_path` are supplied, `sys.stdin` is used.
- `destination`: A file-like object (must have "write" and "close"
methods).
- `destination_path`: Path to the input file. Opened if no `destination`
supplied. If neither `destination` nor `destination_path` are supplied,
`sys.stdout` is used.
- `reader`: A `docutils.readers.Reader` object.
- `reader_name`: Name or alias of the Reader class to be instantiated if
no `reader` supplied.
- `parser`: A `docutils.parsers.Parser` object.
- `parser_name`: Name or alias of the Parser class to be instantiated if
no `parser` supplied.
- `writer`: A `docutils.writers.Writer` object.
- `writer_name`: Name or alias of the Writer class to be instantiated if
no `writer` supplied.
- `settings`: Runtime settings object.
- `settings_spec`: Extra settings specification; a `docutils.SettingsSpec`
subclass. Used only if no `settings` specified.
- `settings_overrides`: A dictionary containing program-specific overrides
of component settings.
- `enable_exit`: Boolean; enable exit status at end of processing?
"""
pub = Publisher(reader, parser, writer, settings=settings)
pub.set_components(reader_name, parser_name, writer_name)
if settings is None:
settings = pub.get_settings(settings_spec=settings_spec)
if settings_overrides:
settings._update(settings_overrides, 'loose')
pub.set_source(source, source_path)
pub.set_destination(destination, destination_path)
pub.publish(enable_exit=enable_exit)
def publish_string(source, source_path=None, destination_path=None,
reader=None, reader_name='standalone',
parser=None, parser_name='restructuredtext',
writer=None, writer_name='pseudoxml',
settings=None, settings_spec=None,
settings_overrides=None, enable_exit=None):
"""
Set up & run a `Publisher`, and return the string output.
For programmatic use with string I/O.
For encoded string output, be sure to set the "output_encoding" setting to
the desired encoding. Set it to "unicode" for unencoded Unicode string
output. Here's how::
publish_string(..., settings_overrides={'output_encoding': 'unicode'})
Similarly for Unicode string input (`source`)::
publish_string(..., settings_overrides={'input_encoding': 'unicode'})
Parameters:
- `source`: An input string; required. This can be an encoded 8-bit
string (set the "input_encoding" setting to the correct encoding) or a
Unicode string (set the "input_encoding" setting to "unicode").
- `source_path`: Path to the file or object that produced `source`;
optional. Only used for diagnostic output.
- `destination_path`: Path to the file or object which will receive the
output; optional. Used for determining relative paths (stylesheets,
source links, etc.).
- `reader`: A `docutils.readers.Reader` object.
- `reader_name`: Name or alias of the Reader class to be instantiated if
no `reader` supplied.
- `parser`: A `docutils.parsers.Parser` object.
- `parser_name`: Name or alias of the Parser class to be instantiated if
no `parser` supplied.
- `writer`: A `docutils.writers.Writer` object.
- `writer_name`: Name or alias of the Writer class to be instantiated if
no `writer` supplied.
- `settings`: Runtime settings object.
- `settings_spec`: Extra settings specification; a `docutils.SettingsSpec`
subclass. Used only if no `settings` specified.
- `settings_overrides`: A dictionary containing program-specific overrides
of component settings.
- `enable_exit`: Boolean; enable exit status at end of processing?
"""
pub = Publisher(reader, parser, writer, settings=settings,
source_class=io.StringInput,
destination_class=io.StringOutput)
pub.set_components(reader_name, parser_name, writer_name)
if settings is None:
settings = pub.get_settings(settings_spec=settings_spec)
if settings_overrides:
settings._update(settings_overrides, 'loose')
pub.set_source(source, source_path)
pub.set_destination(destination_path=destination_path)
return pub.publish(enable_exit=enable_exit)
# Author: David Goodger
# Contact: goodger@users.sourceforge.net
# Revision: $Revision: 1.5 $
# Date: $Date: 2003/08/13 16:19:29 $
# Copyright: This module has been placed in the public domain.
"""
Command-line and common processing for Docutils front-end tools.
Exports the following classes:
- `OptionParser`: Standard Docutils command-line processing.
- `Values`: Runtime settings; objects are simple structs
(``object.attribute``).
- `ConfigParser`: Standard Docutils config file processing.
"""
__docformat__ = 'reStructuredText'
import os
import os.path
import sys
import types
import ConfigParser as CP
import codecs
import docutils
import optparse
from optparse import Values, SUPPRESS_HELP
def store_multiple(option, opt, value, parser, *args, **kwargs):
"""
Store multiple values in `parser.values`. (Option callback.)
Store `None` for each attribute named in `args`, and store the value for
each key (attribute name) in `kwargs`.
"""
for attribute in args:
setattr(parser.values, attribute, None)
for key, value in kwargs.items():
setattr(parser.values, key, value)
def read_config_file(option, opt, value, parser):
"""
Read a configuration file during option processing. (Option callback.)
"""
config_parser = ConfigParser()
config_parser.read(value, parser)
settings = config_parser.get_section('options')
make_paths_absolute(settings, parser.relative_path_settings,
os.path.dirname(value))
parser.values.__dict__.update(settings)
def set_encoding(option, opt, value, parser):
"""
Validate & set the encoding specified. (Option callback.)
"""
try:
value = validate_encoding(option.dest, value)
except LookupError, error:
raise (optparse.OptionValueError('option "%s": %s' % (opt, error)),
None, sys.exc_info()[2])
setattr(parser.values, option.dest, value)
def validate_encoding(name, value):
try:
codecs.lookup(value)
except LookupError:
raise (LookupError('unknown encoding: "%s"' % value),
None, sys.exc_info()[2])
return value
def set_encoding_error_handler(option, opt, value, parser):
"""
Validate & set the encoding error handler specified. (Option callback.)
"""
try:
value = validate_encoding_error_handler(option.dest, value)
except LookupError, error:
raise (optparse.OptionValueError('option "%s": %s' % (opt, error)),
None, sys.exc_info()[2])
setattr(parser.values, option.dest, value)
def validate_encoding_error_handler(name, value):
try:
codecs.lookup_error(value)
except AttributeError: # prior to Python 2.3
if value not in ('strict', 'ignore', 'replace'):
raise (LookupError(
'unknown encoding error handler: "%s" (choices: '
'"strict", "ignore", or "replace")' % value),
None, sys.exc_info()[2])
except LookupError:
raise (LookupError(
'unknown encoding error handler: "%s" (choices: '
'"strict", "ignore", "replace", "backslashreplace", '
'"xmlcharrefreplace", and possibly others; see documentation for '
'the Python ``codecs`` module)' % value),
None, sys.exc_info()[2])
return value
def set_encoding_and_error_handler(option, opt, value, parser):
"""
Validate & set the encoding and error handler specified. (Option callback.)
"""
try:
value = validate_encoding_and_error_handler(option.dest, value)
except LookupError, error:
raise (optparse.OptionValueError('option "%s": %s' % (opt, error)),
None, sys.exc_info()[2])
if ':' in value:
encoding, handler = value.split(':')
setattr(parser.values, option.dest + '_error_handler', handler)
else:
encoding = value
setattr(parser.values, option.dest, encoding)
def validate_encoding_and_error_handler(name, value):
if ':' in value:
encoding, handler = value.split(':')
validate_encoding_error_handler(name + '_error_handler', handler)
else:
encoding = value
validate_encoding(name, encoding)
return value
def make_paths_absolute(pathdict, keys, base_path=None):
"""
Interpret filesystem path settings relative to the `base_path` given.
Paths are values in `pathdict` whose keys are in `keys`. Get `keys` from
`OptionParser.relative_path_settings`.
"""
if base_path is None:
base_path = os.getcwd()
for key in keys:
if pathdict.has_key(key) and pathdict[key]:
pathdict[key] = os.path.normpath(
os.path.abspath(os.path.join(base_path, pathdict[key])))
class OptionParser(optparse.OptionParser, docutils.SettingsSpec):
"""
Parser for command-line and library use. The `settings_spec`
specification here and in other Docutils components are merged to build
the set of command-line options and runtime settings for this process.
Common settings (defined below) and component-specific settings must not
conflict. Short options are reserved for common settings, and components
are restrict to using long options.
"""
threshold_choices = 'info 1 warning 2 error 3 severe 4 none 5'.split()
"""Possible inputs for for --report and --halt threshold values."""
thresholds = {'info': 1, 'warning': 2, 'error': 3, 'severe': 4, 'none': 5}
"""Lookup table for --report and --halt threshold values."""
if hasattr(codecs, 'backslashreplace_errors'):
default_error_encoding_error_handler = 'backslashreplace'
else:
default_error_encoding_error_handler = 'replace'
settings_spec = (
'General Docutils Options',
None,
(('Include a "Generated by Docutils" credit and link at the end '
'of the document.',
['--generator', '-g'], {'action': 'store_true'}),
('Do not include a generator credit.',
['--no-generator'], {'action': 'store_false', 'dest': 'generator'}),
('Include the date at the end of the document (UTC).',
['--date', '-d'], {'action': 'store_const', 'const': '%Y-%m-%d',
'dest': 'datestamp'}),
('Include the time & date at the end of the document (UTC).',
['--time', '-t'], {'action': 'store_const',
'const': '%Y-%m-%d %H:%M UTC',
'dest': 'datestamp'}),
('Do not include a datestamp of any kind.',
['--no-datestamp'], {'action': 'store_const', 'const': None,
'dest': 'datestamp'}),
('Include a "View document source" link (relative to destination).',
['--source-link', '-s'], {'action': 'store_true'}),
('Use the supplied <URL> verbatim for a "View document source" '
'link; implies --source-link.',
['--source-url'], {'metavar': '<URL>'}),
('Do not include a "View document source" link.',
['--no-source-link'],
{'action': 'callback', 'callback': store_multiple,
'callback_args': ('source_link', 'source_url')}),
('Enable backlinks from section headers to table of contents '
'entries. This is the default.',
['--toc-entry-backlinks'],
{'dest': 'toc_backlinks', 'action': 'store_const', 'const': 'entry',
'default': 'entry'}),
('Enable backlinks from section headers to the top of the table of '
'contents.',
['--toc-top-backlinks'],
{'dest': 'toc_backlinks', 'action': 'store_const', 'const': 'top'}),
('Disable backlinks to the table of contents.',
['--no-toc-backlinks'],
{'dest': 'toc_backlinks', 'action': 'store_false'}),
('Enable backlinks from footnotes and citations to their '
'references. This is the default.',
['--footnote-backlinks'],
{'action': 'store_true', 'default': 1}),
('Disable backlinks from footnotes and citations.',
['--no-footnote-backlinks'],
{'dest': 'footnote_backlinks', 'action': 'store_false'}),
('Set verbosity threshold; report system messages at or higher than '
'<level> (by name or number: "info" or "1", warning/2, error/3, '
'severe/4; also, "none" or "5"). Default is 2 (warning).',
['--report', '-r'], {'choices': threshold_choices, 'default': 2,
'dest': 'report_level', 'metavar': '<level>'}),
('Report all system messages, info-level and higher. (Same as '
'"--report=info".)',
['--verbose', '-v'], {'action': 'store_const', 'const': 'info',
'dest': 'report_level'}),
('Do not report any system messages. (Same as "--report=none".)',
['--quiet', '-q'], {'action': 'store_const', 'const': 'none',
'dest': 'report_level'}),
('Set the threshold (<level>) at or above which system messages are '
'converted to exceptions, halting execution immediately. Levels '
'as in --report. Default is 4 (severe).',
['--halt'], {'choices': threshold_choices, 'dest': 'halt_level',
'default': 4, 'metavar': '<level>'}),
('Same as "--halt=info": halt processing at the slightest problem.',
['--strict'], {'action': 'store_const', 'const': 'info',
'dest': 'halt_level'}),
('Enable a non-zero exit status for normal exit if non-halting '
'system messages (at or above <level>) were generated. Levels as '
'in --report. Default is 5 (disabled). Exit status is the maximum '
'system message level plus 10 (11 for INFO, etc.).',
['--exit'], {'choices': threshold_choices, 'dest': 'exit_level',
'default': 5, 'metavar': '<level>'}),
('Report debug-level system messages and generate diagnostic output.',
['--debug'], {'action': 'store_true'}),
('Do not report debug-level system messages or generate diagnostic '
'output.',
['--no-debug'], {'action': 'store_false', 'dest': 'debug'}),
('Send the output of system messages (warnings) to <file>.',
['--warnings'], {'dest': 'warning_stream', 'metavar': '<file>'}),
('Enable Python tracebacks when an error occurs.',
['--traceback'], {'action': 'store_true', 'default': None}),
('Disable Python tracebacks when errors occur; report just the error '
'instead. This is the default.',
['--no-traceback'], {'dest': 'traceback', 'action': 'store_false'}),
('Specify the encoding of input text. Default is locale-dependent.',
['--input-encoding', '-i'],
{'action': 'callback', 'callback': set_encoding,
'metavar': '<name>', 'type': 'string', 'dest': 'input_encoding'}),
('Specify the text encoding for output. Default is UTF-8. '
'Optionally also specify the encoding error handler for unencodable '
'characters (see "--error-encoding"); default is "strict".',
['--output-encoding', '-o'],
{'action': 'callback', 'callback': set_encoding_and_error_handler,
'metavar': '<name[:handler]>', 'type': 'string',
'dest': 'output_encoding', 'default': 'utf-8'}),
(SUPPRESS_HELP, # usually handled by --output-encoding
['--output_encoding_error_handler'],
{'action': 'callback', 'callback': set_encoding_error_handler,
'type': 'string', 'dest': 'output_encoding_error_handler',
'default': 'strict'}),
('Specify the text encoding for error output. Default is ASCII. '
'Optionally also specify the encoding error handler for unencodable '
'characters, after a colon (":"). Acceptable values are the same '
'as for the "error" parameter of Python\'s ``encode`` string '
'method. Default is "%s".' % default_error_encoding_error_handler,
['--error-encoding', '-e'],
{'action': 'callback', 'callback': set_encoding_and_error_handler,
'metavar': '<name[:handler]>', 'type': 'string',
'dest': 'error_encoding', 'default': 'ascii'}),
(SUPPRESS_HELP, # usually handled by --error-encoding
['--error_encoding_error_handler'],
{'action': 'callback', 'callback': set_encoding_error_handler,
'type': 'string', 'dest': 'error_encoding_error_handler',
'default': default_error_encoding_error_handler}),
('Specify the language of input text (ISO 639 2-letter identifier).'
' Default is "en" (English).',
['--language', '-l'], {'dest': 'language_code', 'default': 'en',
'metavar': '<name>'}),
('Read configuration settings from <file>, if it exists.',
['--config'], {'metavar': '<file>', 'type': 'string',
'action': 'callback', 'callback': read_config_file}),
("Show this program's version number and exit.",
['--version', '-V'], {'action': 'version'}),
('Show this help message and exit.',
['--help', '-h'], {'action': 'help'}),
# Hidden options, for development use only:
(SUPPRESS_HELP, ['--dump-settings'], {'action': 'store_true'}),
(SUPPRESS_HELP, ['--dump-internals'], {'action': 'store_true'}),
(SUPPRESS_HELP, ['--dump-transforms'], {'action': 'store_true'}),
(SUPPRESS_HELP, ['--dump-pseudo-xml'], {'action': 'store_true'}),
(SUPPRESS_HELP, ['--expose-internal-attribute'],
{'action': 'append', 'dest': 'expose_internals'}),))
"""Runtime settings and command-line options common to all Docutils front
ends. Setting specs specific to individual Docutils components are also
used (see `populate_from_components()`)."""
settings_defaults = {'_disable_config': None}
"""Defaults for settings that don't have command-line option equivalents."""
relative_path_settings = ('warning_stream',)
version_template = '%%prog (Docutils %s)' % docutils.__version__
"""Default version message."""
def __init__(self, components=(), defaults=None, read_config_files=None,
*args, **kwargs):
"""
`components` is a list of Docutils components each containing a
``.settings_spec`` attribute. `defaults` is a mapping of setting
default overrides.
"""
optparse.OptionParser.__init__(
self, add_help_option=None,
formatter=optparse.TitledHelpFormatter(width=78),
*args, **kwargs)
if not self.version:
self.version = self.version_template
# Make an instance copy (it will be modified):
self.relative_path_settings = list(self.relative_path_settings)
self.populate_from_components((self,) + tuple(components))
defaults = defaults or {}
if read_config_files and not self.defaults['_disable_config']:
# @@@ Extract this code into a method, which can be called from
# the read_config_file callback also.
config = ConfigParser()
config.read_standard_files(self)
config_settings = config.get_section('options')
make_paths_absolute(config_settings, self.relative_path_settings)
defaults.update(config_settings)
# Internal settings with no defaults from settings specifications;
# initialize manually:
self.set_defaults(_source=None, _destination=None, **defaults)
def populate_from_components(self, components):
"""
For each component, first populate from the `SettingsSpec.settings_spec`
structure, then from the `SettingsSpec.settings_defaults` dictionary.
After all components have been processed, check for and populate from
each component's `SettingsSpec.settings_default_overrides` dictionary.
"""
for component in components:
if component is None:
continue
i = 0
settings_spec = component.settings_spec
self.relative_path_settings.extend(
component.relative_path_settings)
while i < len(settings_spec):
title, description, option_spec = settings_spec[i:i+3]
if title:
group = optparse.OptionGroup(self, title, description)
self.add_option_group(group)
else:
group = self # single options
for (help_text, option_strings, kwargs) in option_spec:
group.add_option(help=help_text, *option_strings,
**kwargs)
if component.settings_defaults:
self.defaults.update(component.settings_defaults)
i += 3
for component in components:
if component and component.settings_default_overrides:
self.defaults.update(component.settings_default_overrides)
def check_values(self, values, args):
if hasattr(values, 'report_level'):
values.report_level = self.check_threshold(values.report_level)
if hasattr(values, 'halt_level'):
values.halt_level = self.check_threshold(values.halt_level)
if hasattr(values, 'exit_level'):
values.exit_level = self.check_threshold(values.exit_level)
values._source, values._destination = self.check_args(args)
make_paths_absolute(values.__dict__, self.relative_path_settings,
os.getcwd())
return values
def check_threshold(self, level):
try:
return int(level)
except ValueError:
try:
return self.thresholds[level.lower()]
except (KeyError, AttributeError):
self.error('Unknown threshold: %r.' % level)
def check_args(self, args):
source = destination = None
if args:
source = args.pop(0)
if source == '-': # means stdin
source = None
if args:
destination = args.pop(0)
if destination == '-': # means stdout
destination = None
if args:
self.error('Maximum 2 arguments allowed.')
if source and source == destination:
self.error('Do not specify the same file for both source and '
'destination. It will clobber the source file.')
return source, destination
class ConfigParser(CP.ConfigParser):
standard_config_files = (
'/etc/docutils.conf', # system-wide
'./docutils.conf', # project-specific
os.path.expanduser('~/.docutils')) # user-specific
"""Docutils configuration files, using ConfigParser syntax (section
'options'). Later files override earlier ones."""
validation = {
'options':
{'input_encoding': validate_encoding,
'output_encoding': validate_encoding,
'output_encoding_error_handler': validate_encoding_error_handler,
'error_encoding': validate_encoding,
'error_encoding_error_handler': validate_encoding_error_handler}}
"""{section: {option: validation function}} mapping, used by
`validate_options`. Validation functions take two parameters: name and
value. They return a (possibly modified) value, or raise an exception."""
def read_standard_files(self, option_parser):
self.read(self.standard_config_files, option_parser)
def read(self, filenames, option_parser):
if type(filenames) in types.StringTypes:
filenames = [filenames]
for filename in filenames:
CP.ConfigParser.read(self, filename)
self.validate_options(filename, option_parser)
def validate_options(self, filename, option_parser):
for section in self.validation.keys():
if not self.has_section(section):
continue
for option in self.validation[section].keys():
if self.has_option(section, option):
value = self.get(section, option)
validator = self.validation[section][option]
try:
new_value = validator(option, value)
except Exception, error:
raise (ValueError(
'Error in config file "%s", section "[%s]":\n'
' %s: %s\n %s = %s'
% (filename, section, error.__class__.__name__,
error, option, value)), None, sys.exc_info()[2])
self.set(section, option, new_value)
def optionxform(self, optionstr):
"""
Transform '-' to '_' so the cmdline form of option names can be used.
"""
return optionstr.lower().replace('-', '_')
def get_section(self, section, raw=0, vars=None):
"""
Return a given section as a dictionary (empty if the section
doesn't exist).
All % interpolations are expanded in the return values, based on the
defaults passed into the constructor, unless the optional argument
`raw` is true. Additional substitutions may be provided using the
`vars` argument, which must be a dictionary whose contents overrides
any pre-existing defaults.
The section DEFAULT is special.
"""
section_dict = {}
if self.has_section(section):
for option in self.options(section):
section_dict[option] = self.get(section, option, raw, vars)
return section_dict
# Author: David Goodger
# Contact: goodger@users.sourceforge.net
# Revision: $Revision: 1.3 $
# Date: $Date: 2003/07/10 15:49:30 $
# Copyright: This module has been placed in the public domain.
"""
I/O classes provide a uniform API for low-level input and output. Subclasses
will exist for a variety of input/output mechanisms.
"""
__docformat__ = 'reStructuredText'
import sys
import locale
from types import UnicodeType
from docutils import TransformSpec
class Input(TransformSpec):
"""
Abstract base class for input wrappers.
"""
component_type = 'input'
default_source_path = None
def __init__(self, source=None, source_path=None, encoding=None):
self.encoding = encoding
"""Text encoding for the input source."""
self.source = source
"""The source of input data."""
self.source_path = source_path
"""A text reference to the source."""
if not source_path:
self.source_path = self.default_source_path
def __repr__(self):
return '%s: source=%r, source_path=%r' % (self.__class__, self.source,
self.source_path)
def read(self):
raise NotImplementedError
def decode(self, data):
"""
Decode a string, `data`, heuristically.
Raise UnicodeError if unsuccessful.
The client application should call ``locale.setlocale`` at the
beginning of processing::
locale.setlocale(locale.LC_ALL, '')
"""
if (self.encoding and self.encoding.lower() == 'unicode'
or isinstance(data, UnicodeType)):
return unicode(data)
encodings = [self.encoding, 'utf-8']
try:
encodings.append(locale.nl_langinfo(locale.CODESET))
except:
pass
try:
encodings.append(locale.getlocale()[1])
except:
pass
try:
encodings.append(locale.getdefaultlocale()[1])
except:
pass
encodings.append('latin-1')
for enc in encodings:
if not enc:
continue
try:
return unicode(data, enc)
except (UnicodeError, LookupError):
pass
raise UnicodeError(
'Unable to decode input data. Tried the following encodings: %s.'
% ', '.join([repr(enc) for enc in encodings if enc]))
class Output(TransformSpec):
"""
Abstract base class for output wrappers.
"""
component_type = 'output'
default_destination_path = None
def __init__(self, destination=None, destination_path=None,
encoding=None, error_handler='strict'):
self.encoding = encoding
"""Text encoding for the output destination."""
self.error_handler = error_handler or 'strict'
"""Text encoding error handler."""
self.destination = destination
"""The destination for output data."""
self.destination_path = destination_path
"""A text reference to the destination."""
if not destination_path:
self.destination_path = self.default_destination_path
def __repr__(self):
return ('%s: destination=%r, destination_path=%r'
% (self.__class__, self.destination, self.destination_path))
def write(self, data):
raise NotImplementedError
def encode(self, data):
if self.encoding and self.encoding.lower() == 'unicode':
return data
else:
return data.encode(self.encoding, self.error_handler)
class FileInput(Input):
"""
Input for single, simple file-like objects.
"""
def __init__(self, source=None, source_path=None,
encoding=None, autoclose=1, handle_io_errors=1):
"""
:Parameters:
- `source`: either a file-like object (which is read directly), or
`None` (which implies `sys.stdin` if no `source_path` given).
- `source_path`: a path to a file, which is opened and then read.
- `autoclose`: close automatically after read (boolean); always
false if `sys.stdin` is the source.
"""
Input.__init__(self, source, source_path, encoding)
self.autoclose = autoclose
self.handle_io_errors = handle_io_errors
if source is None:
if source_path:
try:
self.source = open(source_path)
except IOError, error:
if not handle_io_errors:
raise
print >>sys.stderr, '%s: %s' % (error.__class__.__name__,
error)
print >>sys.stderr, (
'Unable to open source file for reading (%s). Exiting.'
% source_path)
sys.exit(1)
else:
self.source = sys.stdin
self.autoclose = None
if not source_path:
try:
self.source_path = self.source.name
except AttributeError:
pass
def read(self):
"""Read and decode a single file and return the data."""
data = self.source.read()
if self.autoclose:
self.close()
return self.decode(data)
def close(self):
self.source.close()
class FileOutput(Output):
"""
Output for single, simple file-like objects.
"""
def __init__(self, destination=None, destination_path=None,
encoding=None, error_handler='strict', autoclose=1,
handle_io_errors=1):
"""
:Parameters:
- `destination`: either a file-like object (which is written
directly) or `None` (which implies `sys.stdout` if no
`destination_path` given).
- `destination_path`: a path to a file, which is opened and then
written.
- `autoclose`: close automatically after write (boolean); always
false if `sys.stdout` is the destination.
"""
Output.__init__(self, destination, destination_path,
encoding, error_handler)
self.opened = 1
self.autoclose = autoclose
self.handle_io_errors = handle_io_errors
if destination is None:
if destination_path:
self.opened = None
else:
self.destination = sys.stdout
self.autoclose = None
if not destination_path:
try:
self.destination_path = self.destination.name
except AttributeError:
pass
def open(self):
try:
self.destination = open(self.destination_path, 'w')
except IOError, error:
if not self.handle_io_errors:
raise
print >>sys.stderr, '%s: %s' % (error.__class__.__name__,
error)
print >>sys.stderr, ('Unable to open destination file for writing '
'(%s). Exiting.' % source_path)
sys.exit(1)
self.opened = 1
def write(self, data):
"""Encode `data`, write it to a single file, and return it."""
output = self.encode(data)
if not self.opened:
self.open()
self.destination.write(output)
if self.autoclose:
self.close()
return output
def close(self):
self.destination.close()
self.opened = None
class StringInput(Input):
"""
Direct string input.
"""
default_source_path = '<string>'
def read(self):
"""Decode and return the source string."""
return self.decode(self.source)
class StringOutput(Output):
"""
Direct string output.
"""
default_destination_path = '<string>'
def write(self, data):
"""Encode `data`, store it in `self.destination`, and return it."""
self.destination = self.encode(data)
return self.destination
class NullInput(Input):
"""
Degenerate input: read nothing.
"""
default_source_path = 'null input'
def read(self):
"""Return a null string."""
return u''
class NullOutput(Output):
"""
Degenerate output: write nothing.
"""
default_destination_path = 'null output'
def write(self, data):
"""Do nothing ([don't even] send data to the bit bucket)."""
pass
# Author: David Goodger
# Contact: goodger@users.sourceforge.net
# Revision: $Revision: 1.3 $
# Date: $Date: 2003/07/10 15:49:30 $
# Copyright: This module has been placed in the public domain.
"""
Docutils document tree element class library.
Classes in CamelCase are abstract base classes or auxiliary classes. The one
exception is `Text`, for a text (PCDATA) node; uppercase is used to
differentiate from element classes. Classes in lower_case_with_underscores
are element classes, matching the XML element generic identifiers in the DTD_.
The position of each node (the level at which it can occur) is significant and
is represented by abstract base classes (`Root`, `Structural`, `Body`,
`Inline`, etc.). Certain transformations will be easier because we can use
``isinstance(node, base_class)`` to determine the position of the node in the
hierarchy.
.. _DTD: http://docutils.sourceforge.net/spec/docutils.dtd
"""
__docformat__ = 'reStructuredText'
import sys
import os
import re
import xml.dom.minidom
from types import IntType, SliceType, StringType, UnicodeType, \
TupleType, ListType
from UserString import UserString
# ==============================
# Functional Node Base Classes
# ==============================
class Node:
"""Abstract base class of nodes in a document tree."""
parent = None
"""Back-reference to the Node immediately containing this Node."""
document = None
"""The `document` node at the root of the tree containing this Node."""
source = None
"""Path or description of the input source which generated this Node."""
line = None
"""The line number (1-based) of the beginning of this Node in `source`."""
def __nonzero__(self):
"""
Node instances are always true, even if they're empty. A node is more
than a simple container. Its boolean "truth" does not depend on
having one or more subnodes in the doctree.
Use `len()` to check node length. Use `None` to represent a boolean
false value.
"""
return 1
def asdom(self, dom=xml.dom.minidom):
"""Return a DOM **fragment** representation of this Node."""
domroot = dom.Document()
return self._dom_node(domroot)
def pformat(self, indent=' ', level=0):
"""Return an indented pseudo-XML representation, for test purposes."""
raise NotImplementedError
def copy(self):
"""Return a copy of self."""
raise NotImplementedError
def setup_child(self, child):
child.parent = self
if self.document:
child.document = self.document
if child.source is None:
child.source = self.document.current_source
if child.line is None:
child.line = self.document.current_line
def walk(self, visitor):
"""
Traverse a tree of `Node` objects, calling ``visit_...`` methods of
`visitor` when entering each node. If there is no
``visit_particular_node`` method for a node of type
``particular_node``, the ``unknown_visit`` method is called. (The
`walkabout()` method is similar, except it also calls ``depart_...``
methods before exiting each node.)
This tree traversal supports limited in-place tree
modifications. Replacing one node with one or more nodes is
OK, as is removing an element. However, if the node removed
or replaced occurs after the current node, the old node will
still be traversed, and any new nodes will not.
Within ``visit_...`` methods (and ``depart_...`` methods for
`walkabout()`), `TreePruningException` subclasses may be raised
(`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
Parameter `visitor`: A `NodeVisitor` object, containing a
``visit_...`` method for each `Node` subclass encountered.
"""
name = 'visit_' + self.__class__.__name__
method = getattr(visitor, name, visitor.unknown_visit)
visitor.document.reporter.debug(name, category='nodes.Node.walk')
try:
method(self)
except (SkipChildren, SkipNode):
return
except SkipDeparture: # not applicable; ignore
pass
children = self.get_children()
try:
for child in children[:]:
child.walk(visitor)
except SkipSiblings:
pass
def walkabout(self, visitor):
"""
Perform a tree traversal similarly to `Node.walk()` (which see),
except also call ``depart_...`` methods before exiting each node. If
there is no ``depart_particular_node`` method for a node of type
``particular_node``, the ``unknown_departure`` method is called.
Parameter `visitor`: A `NodeVisitor` object, containing ``visit_...``
and ``depart_...`` methods for each `Node` subclass encountered.
"""
call_depart = 1
name = 'visit_' + self.__class__.__name__
method = getattr(visitor, name, visitor.unknown_visit)
visitor.document.reporter.debug(name, category='nodes.Node.walkabout')
try:
try:
method(self)
except SkipNode:
return
except SkipDeparture:
call_depart = 0
children = self.get_children()
try:
for child in children[:]:
child.walkabout(visitor)
except SkipSiblings:
pass
except SkipChildren:
pass
if call_depart:
name = 'depart_' + self.__class__.__name__
method = getattr(visitor, name, visitor.unknown_departure)
visitor.document.reporter.debug(
name, category='nodes.Node.walkabout')
method(self)
class Text(Node, UserString):
"""
Instances are terminal nodes (leaves) containing text only; no child
nodes or attributes. Initialize by passing a string to the constructor.
Access the text itself with the `astext` method.
"""
tagname = '#text'
def __init__(self, data, rawsource=''):
UserString.__init__(self, data)
self.rawsource = rawsource
"""The raw text from which this element was constructed."""
def __repr__(self):
data = repr(self.data)
if len(data) > 70:
data = repr(self.data[:64] + ' ...')
return '<%s: %s>' % (self.tagname, data)
def __len__(self):
return len(self.data)
def shortrepr(self):
data = repr(self.data)
if len(data) > 20:
data = repr(self.data[:16] + ' ...')
return '<%s: %s>' % (self.tagname, data)
def _dom_node(self, domroot):
return domroot.createTextNode(self.data)
def astext(self):
return self.data
def copy(self):
return self.__class__(self.data)
def pformat(self, indent=' ', level=0):
result = []
indent = indent * level
for line in self.data.splitlines():
result.append(indent + line + '\n')
return ''.join(result)
def get_children(self):
"""Text nodes have no children. Return []."""
return []
class Element(Node):
"""
`Element` is the superclass to all specific elements.
Elements contain attributes and child nodes. Elements emulate
dictionaries for attributes, indexing by attribute name (a string). To
set the attribute 'att' to 'value', do::
element['att'] = 'value'
Elements also emulate lists for child nodes (element nodes and/or text
nodes), indexing by integer. To get the first child node, use::
element[0]
Elements may be constructed using the ``+=`` operator. To add one new
child node to element, do::
element += node
This is equivalent to ``element.append(node)``.
To add a list of multiple child nodes at once, use the same ``+=``
operator::
element += [node1, node2]
This is equivalent to ``element.extend([node1, node2])``.
"""
tagname = None
"""The element generic identifier. If None, it is set as an instance
attribute to the name of the class."""
child_text_separator = '\n\n'
"""Separator for child nodes, used by `astext()` method."""
def __init__(self, rawsource='', *children, **attributes):
self.rawsource = rawsource
"""The raw text from which this element was constructed."""
self.children = []
"""List of child nodes (elements and/or `Text`)."""
self.extend(children) # maintain parent info
self.attributes = {}
"""Dictionary of attribute {name: value}."""
for att, value in attributes.items():
self.attributes[att.lower()] = value
if self.tagname is None:
self.tagname = self.__class__.__name__
def _dom_node(self, domroot):
element = domroot.createElement(self.tagname)
for attribute, value in self.attributes.items():
if isinstance(value, ListType):
value = ' '.join(['%s' % v for v in value])
element.setAttribute(attribute, '%s' % value)
for child in self.children:
element.appendChild(child._dom_node(domroot))
return element
def __repr__(self):
data = ''
for c in self.children:
data += c.shortrepr()
if len(data) > 60:
data = data[:56] + ' ...'
break
if self.hasattr('name'):
return '<%s "%s": %s>' % (self.__class__.__name__,
self.attributes['name'], data)
else:
return '<%s: %s>' % (self.__class__.__name__, data)
def shortrepr(self):
if self.hasattr('name'):
return '<%s "%s"...>' % (self.__class__.__name__,
self.attributes['name'])
else:
return '<%s...>' % self.tagname
def __str__(self):
return unicode(self).encode('raw_unicode_escape')
def __unicode__(self):
if self.children:
return u'%s%s%s' % (self.starttag(),
''.join([str(c) for c in self.children]),
self.endtag())
else:
return self.emptytag()
def starttag(self):
parts = [self.tagname]
for name, value in self.attlist():
if value is None: # boolean attribute
parts.append(name)
elif isinstance(value, ListType):
values = ['%s' % v for v in value]
parts.append('%s="%s"' % (name, ' '.join(values)))
else:
parts.append('%s="%s"' % (name, value))
return '<%s>' % ' '.join(parts)
def endtag(self):
return '</%s>' % self.tagname
def emptytag(self):
return u'<%s/>' % ' '.join([self.tagname] +
['%s="%s"' % (n, v)
for n, v in self.attlist()])
def __len__(self):
return len(self.children)
def __getitem__(self, key):
if isinstance(key, UnicodeType) or isinstance(key, StringType):
return self.attributes[key]
elif isinstance(key, IntType):
return self.children[key]
elif isinstance(key, SliceType):
assert key.step in (None, 1), 'cannot handle slice with stride'
return self.children[key.start:key.stop]
else:
raise TypeError, ('element index must be an integer, a slice, or '
'an attribute name string')
def __setitem__(self, key, item):
if isinstance(key, UnicodeType) or isinstance(key, StringType):
self.attributes[str(key)] = item
elif isinstance(key, IntType):
self.setup_child(item)
self.children[key] = item
elif isinstance(key, SliceType):
assert key.step in (None, 1), 'cannot handle slice with stride'
for node in item:
self.setup_child(node)
self.children[key.start:key.stop] = item
else:
raise TypeError, ('element index must be an integer, a slice, or '
'an attribute name string')
def __delitem__(self, key):
if isinstance(key, UnicodeType) or isinstance(key, StringType):
del self.attributes[key]
elif isinstance(key, IntType):
del self.children[key]
elif isinstance(key, SliceType):
assert key.step in (None, 1), 'cannot handle slice with stride'
del self.children[key.start:key.stop]
else:
raise TypeError, ('element index must be an integer, a simple '
'slice, or an attribute name string')
def __add__(self, other):
return self.children + other
def __radd__(self, other):
return other + self.children
def __iadd__(self, other):
"""Append a node or a list of nodes to `self.children`."""
if isinstance(other, Node):
self.setup_child(other)
self.children.append(other)
elif other is not None:
for node in other:
self.setup_child(node)
self.children.extend(other)
return self
def astext(self):
return self.child_text_separator.join(
[child.astext() for child in self.children])
def attlist(self):
attlist = self.attributes.items()
attlist.sort()
return attlist
def get(self, key, failobj=None):
return self.attributes.get(key, failobj)
def hasattr(self, attr):
return self.attributes.has_key(attr)
def delattr(self, attr):
if self.attributes.has_key(attr):
del self.attributes[attr]
def setdefault(self, key, failobj=None):
return self.attributes.setdefault(key, failobj)
has_key = hasattr
def append(self, item):
self.setup_child(item)
self.children.append(item)
def extend(self, item):
for node in item:
self.setup_child(node)
self.children.extend(item)
def insert(self, index, item):
if isinstance(item, Node):
self.setup_child(item)
self.children.insert(index, item)
elif item is not None:
self[index:index] = item
def pop(self, i=-1):
return self.children.pop(i)
def remove(self, item):
self.children.remove(item)
def index(self, item):
return self.children.index(item)
def replace(self, old, new):
"""Replace one child `Node` with another child or children."""
index = self.index(old)
if isinstance(new, Node):
self.setup_child(new)
self[index] = new
elif new is not None:
self[index:index+1] = new
def first_child_matching_class(self, childclass, start=0, end=sys.maxint):
"""
Return the index of the first child whose class exactly matches.
Parameters:
- `childclass`: A `Node` subclass to search for, or a tuple of `Node`
classes. If a tuple, any of the classes may match.
- `start`: Initial index to check.
- `end`: Initial index to *not* check.
"""
if not isinstance(childclass, TupleType):
childclass = (childclass,)
for index in range(start, min(len(self), end)):
for c in childclass:
if isinstance(self[index], c):
return index
return None
def first_child_not_matching_class(self, childclass, start=0,
end=sys.maxint):
"""
Return the index of the first child whose class does *not* match.
Parameters:
- `childclass`: A `Node` subclass to skip, or a tuple of `Node`
classes. If a tuple, none of the classes may match.
- `start`: Initial index to check.
- `end`: Initial index to *not* check.
"""
if not isinstance(childclass, TupleType):
childclass = (childclass,)
for index in range(start, min(len(self), end)):
match = 0
for c in childclass:
if isinstance(self.children[index], c):
match = 1
break
if not match:
return index
return None
def pformat(self, indent=' ', level=0):
return ''.join(['%s%s\n' % (indent * level, self.starttag())] +
[child.pformat(indent, level+1)
for child in self.children])
def get_children(self):
"""Return this element's children."""
return self.children
def copy(self):
return self.__class__(**self.attributes)
def set_class(self, name):
"""Add a new name to the "class" attribute."""
self.attributes['class'] = (self.attributes.get('class', '') + ' '
+ name.lower()).strip()
class TextElement(Element):
"""
An element which directly contains text.
Its children are all Text or TextElement nodes.
"""
child_text_separator = ''
"""Separator for child nodes, used by `astext()` method."""
def __init__(self, rawsource='', text='', *children, **attributes):
if text != '':
textnode = Text(text)
Element.__init__(self, rawsource, textnode, *children,
**attributes)
else:
Element.__init__(self, rawsource, *children, **attributes)
class FixedTextElement(TextElement):
"""An element which directly contains preformatted text."""
def __init__(self, rawsource='', text='', *children, **attributes):
TextElement.__init__(self, rawsource, text, *children, **attributes)
self.attributes['xml:space'] = 'preserve'
# ========
# Mixins
# ========
class Resolvable:
resolved = 0
class BackLinkable:
def add_backref(self, refid):
self.setdefault('backrefs', []).append(refid)
# ====================
# Element Categories
# ====================
class Root: pass
class Titular: pass
class PreDecorative:
"""Category of Node which may occur before Decorative Nodes."""
class PreBibliographic(PreDecorative):
"""Category of Node which may occur before Bibliographic Nodes."""
class Bibliographic(PreDecorative): pass
class Decorative: pass
class Structural: pass
class Body: pass
class General(Body): pass
class Sequential(Body): pass
class Admonition(Body): pass
class Special(Body):
"""Special internal body elements."""
class Invisible:
"""Internal elements that don't appear in output."""
class Part: pass
class Inline: pass
class Referential(Resolvable): pass
class Targetable(Resolvable):
referenced = 0
class Labeled:
"""Contains a `label` as its first element."""
# ==============
# Root Element
# ==============
class document(Root, Structural, Element):
def __init__(self, settings, reporter, *args, **kwargs):
Element.__init__(self, *args, **kwargs)
self.current_source = None
"""Path to or description of the input source being processed."""
self.current_line = None
"""Line number (1-based) of `current_source`."""
self.settings = settings
"""Runtime settings data record."""
self.reporter = reporter
"""System message generator."""
self.external_targets = []
"""List of external target nodes."""
self.internal_targets = []
"""List of internal target nodes."""
self.indirect_targets = []
"""List of indirect target nodes."""
self.substitution_defs = {}
"""Mapping of substitution names to substitution_definition nodes."""
self.substitution_names = {}
"""Mapping of case-normalized substitution names to case-sensitive
names."""
self.refnames = {}
"""Mapping of names to lists of referencing nodes."""
self.refids = {}
"""Mapping of ids to lists of referencing nodes."""
self.nameids = {}
"""Mapping of names to unique id's."""
self.nametypes = {}
"""Mapping of names to hyperlink type (boolean: True => explicit,
False => implicit."""
self.ids = {}
"""Mapping of ids to nodes."""
self.substitution_refs = {}
"""Mapping of substitution names to lists of substitution_reference
nodes."""
self.footnote_refs = {}
"""Mapping of footnote labels to lists of footnote_reference nodes."""
self.citation_refs = {}
"""Mapping of citation labels to lists of citation_reference nodes."""
self.anonymous_targets = []
"""List of anonymous target nodes."""
self.anonymous_refs = []
"""List of anonymous reference nodes."""
self.autofootnotes = []
"""List of auto-numbered footnote nodes."""
self.autofootnote_refs = []
"""List of auto-numbered footnote_reference nodes."""
self.symbol_footnotes = []
"""List of symbol footnote nodes."""
self.symbol_footnote_refs = []
"""List of symbol footnote_reference nodes."""
self.footnotes = []
"""List of manually-numbered footnote nodes."""
self.citations = []
"""List of citation nodes."""
self.autofootnote_start = 1
"""Initial auto-numbered footnote number."""
self.symbol_footnote_start = 0
"""Initial symbol footnote symbol index."""
self.id_start = 1
"""Initial ID number."""
self.parse_messages = []
"""System messages generated while parsing."""
self.transform_messages = []
"""System messages generated while applying transforms."""
import docutils.transforms
self.transformer = docutils.transforms.Transformer(self)
"""Storage for transforms to be applied to this document."""
self.document = self
def asdom(self, dom=xml.dom.minidom):
"""Return a DOM representation of this document."""
domroot = dom.Document()
domroot.appendChild(self._dom_node(domroot))
return domroot
def set_id(self, node, msgnode=None):
if node.has_key('id'):
id = node['id']
if self.ids.has_key(id) and self.ids[id] is not node:
msg = self.reporter.severe('Duplicate ID: "%s".' % id)
if msgnode != None:
msgnode += msg
else:
if node.has_key('name'):
id = make_id(node['name'])
else:
id = ''
while not id or self.ids.has_key(id):
id = 'id%s' % self.id_start
self.id_start += 1
node['id'] = id
self.ids[id] = node
return id
def set_name_id_map(self, node, id, msgnode=None, explicit=None):
"""
`self.nameids` maps names to IDs, while `self.nametypes` maps names to
booleans representing hyperlink type (True==explicit,
False==implicit). This method updates the mappings.
The following state transition table shows how `self.nameids` ("ids")
and `self.nametypes` ("types") change with new input (a call to this
method), and what actions are performed:
==== ===== ======== ======== ======= ==== ===== =====
Old State Input Action New State Notes
----------- -------- ----------------- ----------- -----
ids types new type sys.msg. dupname ids types
==== ===== ======== ======== ======= ==== ===== =====
-- -- explicit -- -- new True
-- -- implicit -- -- new False
None False explicit -- -- new True
old False explicit implicit old new True
None True explicit explicit new None True
old True explicit explicit new,old None True [#]_
None False implicit implicit new None False
old False implicit implicit new,old None False
None True implicit implicit new None True
old True implicit implicit new old True
==== ===== ======== ======== ======= ==== ===== =====
.. [#] Do not clear the name-to-id map or invalidate the old target if
both old and new targets are external and refer to identical URIs.
The new target is invalidated regardless.
"""
if node.has_key('name'):
name = node['name']
if self.nameids.has_key(name):
self.set_duplicate_name_id(node, id, name, msgnode, explicit)
else:
self.nameids[name] = id
self.nametypes[name] = explicit
def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
old_id = self.nameids[name]
old_explicit = self.nametypes[name]
self.nametypes[name] = old_explicit or explicit
if explicit:
if old_explicit:
level = 2
if old_id is not None:
old_node = self.ids[old_id]
if node.has_key('refuri'):
refuri = node['refuri']
if old_node.has_key('name') \
and old_node.has_key('refuri') \
and old_node['refuri'] == refuri:
level = 1 # just inform if refuri's identical
if level > 1:
dupname(old_node)
self.nameids[name] = None
msg = self.reporter.system_message(
level, 'Duplicate explicit target name: "%s".' % name,
backrefs=[id], base_node=node)
if msgnode != None:
msgnode += msg
dupname(node)
else:
self.nameids[name] = id
if old_id is not None:
old_node = self.ids[old_id]
dupname(old_node)
else:
if old_id is not None and not old_explicit:
self.nameids[name] = None
old_node = self.ids[old_id]
dupname(old_node)
dupname(node)
if not explicit or (not old_explicit and old_id is not None):
msg = self.reporter.info(
'Duplicate implicit target name: "%s".' % name,
backrefs=[id], base_node=node)
if msgnode != None:
msgnode += msg
def has_name(self, name):
return self.nameids.has_key(name)
def note_implicit_target(self, target, msgnode=None):
id = self.set_id(target, msgnode)
self.set_name_id_map(target, id, msgnode, explicit=None)
def note_explicit_target(self, target, msgnode=None):
id = self.set_id(target, msgnode)
self.set_name_id_map(target, id, msgnode, explicit=1)
def note_refname(self, node):
self.refnames.setdefault(node['refname'], []).append(node)
def note_refid(self, node):
self.refids.setdefault(node['refid'], []).append(node)
def note_external_target(self, target):
self.external_targets.append(target)
def note_internal_target(self, target):
self.internal_targets.append(target)
def note_indirect_target(self, target):
self.indirect_targets.append(target)
if target.has_key('name'):
self.note_refname(target)
def note_anonymous_target(self, target):
self.set_id(target)
self.anonymous_targets.append(target)
def note_anonymous_ref(self, ref):
self.anonymous_refs.append(ref)
def note_autofootnote(self, footnote):
self.set_id(footnote)
self.autofootnotes.append(footnote)
def note_autofootnote_ref(self, ref):
self.set_id(ref)
self.autofootnote_refs.append(ref)
def note_symbol_footnote(self, footnote):
self.set_id(footnote)
self.symbol_footnotes.append(footnote)
def note_symbol_footnote_ref(self, ref):
self.set_id(ref)
self.symbol_footnote_refs.append(ref)
def note_footnote(self, footnote):
self.set_id(footnote)
self.footnotes.append(footnote)
def note_footnote_ref(self, ref):
self.set_id(ref)
self.footnote_refs.setdefault(ref['refname'], []).append(ref)
self.note_refname(ref)
def note_citation(self, citation):
self.citations.append(citation)
def note_citation_ref(self, ref):
self.set_id(ref)
self.citation_refs.setdefault(ref['refname'], []).append(ref)
self.note_refname(ref)
def note_substitution_def(self, subdef, def_name, msgnode=None):
name = subdef['name'] = whitespace_normalize_name(def_name)
if self.substitution_defs.has_key(name):
msg = self.reporter.error(
'Duplicate substitution definition name: "%s".' % name,
base_node=subdef)
if msgnode != None:
msgnode += msg
oldnode = self.substitution_defs[name]
dupname(oldnode)
# keep only the last definition:
self.substitution_defs[name] = subdef
# case-insensitive mapping:
self.substitution_names[fully_normalize_name(name)] = name
def note_substitution_ref(self, subref, refname):
name = subref['refname'] = whitespace_normalize_name(refname)
self.substitution_refs.setdefault(name, []).append(subref)
def note_pending(self, pending, priority=None):
self.transformer.add_pending(pending, priority)
def note_parse_message(self, message):
self.parse_messages.append(message)
def note_transform_message(self, message):
self.transform_messages.append(message)
def note_source(self, source, offset):
self.current_source = source
if offset is None:
self.current_line = offset
else:
self.current_line = offset + 1
def copy(self):
return self.__class__(self.settings, self.reporter,
**self.attributes)
# ================
# Title Elements
# ================
class title(Titular, PreBibliographic, TextElement): pass
class subtitle(Titular, PreBibliographic, TextElement): pass
class rubric(Titular, TextElement): pass
# ========================
# Bibliographic Elements
# ========================
class docinfo(Bibliographic, Element): pass
class author(Bibliographic, TextElement): pass
class authors(Bibliographic, Element): pass
class organization(Bibliographic, TextElement): pass
class address(Bibliographic, FixedTextElement): pass
class contact(Bibliographic, TextElement): pass
class version(Bibliographic, TextElement): pass
class revision(Bibliographic, TextElement): pass
class status(Bibliographic, TextElement): pass
class date(Bibliographic, TextElement): pass
class copyright(Bibliographic, TextElement): pass
# =====================
# Decorative Elements
# =====================
class decoration(Decorative, Element): pass
class header(Decorative, Element): pass
class footer(Decorative, Element): pass
# =====================
# Structural Elements
# =====================
class section(Structural, Element): pass
class topic(Structural, Element):
"""
Topics are terminal, "leaf" mini-sections, like block quotes with titles,
or textual figures. A topic is just like a section, except that it has no
subsections, and it doesn't have to conform to section placement rules.
Topics are allowed wherever body elements (list, table, etc.) are allowed,
but only at the top level of a section or document. Topics cannot nest
inside topics, sidebars, or body elements; you can't have a topic inside a
table, list, block quote, etc.
"""
class sidebar(Structural, Element):
"""
Sidebars are like miniature, parallel documents that occur inside other
documents, providing related or reference material. A sidebar is
typically offset by a border and "floats" to the side of the page; the
document's main text may flow around it. Sidebars can also be likened to
super-footnotes; their content is outside of the flow of the document's
main text.
Sidebars are allowed wherever body elements (list, table, etc.) are
allowed, but only at the top level of a section or document. Sidebars
cannot nest inside sidebars, topics, or body elements; you can't have a
sidebar inside a table, list, block quote, etc.
"""
class transition(Structural, Element): pass
# ===============
# Body Elements
# ===============
class paragraph(General, TextElement): pass
class bullet_list(Sequential, Element): pass
class enumerated_list(Sequential, Element): pass
class list_item(Part, Element): pass
class definition_list(Sequential, Element): pass
class definition_list_item(Part, Element): pass
class term(Part, TextElement): pass
class classifier(Part, TextElement): pass
class definition(Part, Element): pass
class field_list(Sequential, Element): pass
class field(Part, Element): pass
class field_name(Part, TextElement): pass
class field_body(Part, Element): pass
class option(Part, Element):
child_text_separator = ''
class option_argument(Part, TextElement):
def astext(self):
return self.get('delimiter', ' ') + TextElement.astext(self)
class option_group(Part, Element):
child_text_separator = ', '
class option_list(Sequential, Element): pass
class option_list_item(Part, Element):
child_text_separator = ' '
class option_string(Part, TextElement): pass
class description(Part, Element): pass
class literal_block(General, FixedTextElement): pass
class doctest_block(General, FixedTextElement): pass
class line_block(General, FixedTextElement): pass
class block_quote(General, Element): pass
class attribution(Part, TextElement): pass
class attention(Admonition, Element): pass
class caution(Admonition, Element): pass
class danger(Admonition, Element): pass
class error(Admonition, Element): pass
class important(Admonition, Element): pass
class note(Admonition, Element): pass
class tip(Admonition, Element): pass
class hint(Admonition, Element): pass
class warning(Admonition, Element): pass
class admonition(Admonition, Element): pass
class comment(Special, Invisible, PreBibliographic, FixedTextElement): pass
class substitution_definition(Special, Invisible, TextElement): pass
class target(Special, Invisible, Inline, TextElement, Targetable): pass
class footnote(General, Element, Labeled, BackLinkable): pass
class citation(General, Element, Labeled, BackLinkable): pass
class label(Part, TextElement): pass
class figure(General, Element): pass
class caption(Part, TextElement): pass
class legend(Part, Element): pass
class table(General, Element): pass
class tgroup(Part, Element): pass
class colspec(Part, Element): pass
class thead(Part, Element): pass
class tbody(Part, Element): pass
class row(Part, Element): pass
class entry(Part, Element): pass
class system_message(Special, PreBibliographic, Element, BackLinkable):
def __init__(self, message=None, *children, **attributes):
if message:
p = paragraph('', message)
children = (p,) + children
try:
Element.__init__(self, '', *children, **attributes)
except:
print 'system_message: children=%r' % (children,)
raise
def astext(self):
line = self.get('line', '')
return u'%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
self['level'], Element.astext(self))
class pending(Special, Invisible, PreBibliographic, Element):
"""
The "pending" element is used to encapsulate a pending operation: the
operation (transform), the point at which to apply it, and any data it
requires. Only the pending operation's location within the document is
stored in the public document tree (by the "pending" object itself); the
operation and its data are stored in the "pending" object's internal
instance attributes.
For example, say you want a table of contents in your reStructuredText
document. The easiest way to specify where to put it is from within the
document, with a directive::
.. contents::
But the "contents" directive can't do its work until the entire document
has been parsed and possibly transformed to some extent. So the directive
code leaves a placeholder behind that will trigger the second phase of the
its processing, something like this::
<pending ...public attributes...> + internal attributes
Use `document.note_pending()` so that the
`docutils.transforms.Transformer` stage of processing can run all pending
transforms.
"""
def __init__(self, transform, details=None,
rawsource='', *children, **attributes):
Element.__init__(self, rawsource, *children, **attributes)
self.transform = transform
"""The `docutils.transforms.Transform` class implementing the pending
operation."""
self.details = details or {}
"""Detail data (dictionary) required by the pending operation."""
def pformat(self, indent=' ', level=0):
internals = [
'.. internal attributes:',
' .transform: %s.%s' % (self.transform.__module__,
self.transform.__name__),
' .details:']
details = self.details.items()
details.sort()
for key, value in details:
if isinstance(value, Node):
internals.append('%7s%s:' % ('', key))
internals.extend(['%9s%s' % ('', line)
for line in value.pformat().splitlines()])
elif value and isinstance(value, ListType) \
and isinstance(value[0], Node):
internals.append('%7s%s:' % ('', key))
for v in value:
internals.extend(['%9s%s' % ('', line)
for line in v.pformat().splitlines()])
else:
internals.append('%7s%s: %r' % ('', key, value))
return (Element.pformat(self, indent, level)
+ ''.join([(' %s%s\n' % (indent * level, line))
for line in internals]))
def copy(self):
return self.__class__(self.transform, self.details, self.rawsource,
**self.attribuates)
class raw(Special, Inline, PreBibliographic, FixedTextElement):
"""
Raw data that is to be passed untouched to the Writer.
"""
pass
# =================
# Inline Elements
# =================
class emphasis(Inline, TextElement): pass
class strong(Inline, TextElement): pass
class literal(Inline, TextElement): pass
class reference(Inline, Referential, TextElement): pass
class footnote_reference(Inline, Referential, TextElement): pass
class citation_reference(Inline, Referential, TextElement): pass
class substitution_reference(Inline, TextElement): pass
class title_reference(Inline, TextElement): pass
class abbreviation(Inline, TextElement): pass
class acronym(Inline, TextElement): pass
class superscript(Inline, TextElement): pass
class subscript(Inline, TextElement): pass
class image(General, Inline, TextElement):
def astext(self):
return self.get('alt', '')
class inline(Inline, TextElement): pass
class problematic(Inline, TextElement): pass
class generated(Inline, TextElement): pass
# ========================================
# Auxiliary Classes, Functions, and Data
# ========================================
node_class_names = """
Text
abbreviation acronym address admonition attention attribution author
authors
block_quote bullet_list
caption caution citation citation_reference classifier colspec comment
contact copyright
danger date decoration definition definition_list definition_list_item
description docinfo doctest_block document
emphasis entry enumerated_list error
field field_body field_list field_name figure footer
footnote footnote_reference
generated
header hint
image important inline
label legend line_block list_item literal literal_block
note
option option_argument option_group option_list option_list_item
option_string organization
paragraph pending problematic
raw reference revision row rubric
section sidebar status strong subscript substitution_definition
substitution_reference subtitle superscript system_message
table target tbody term tgroup thead tip title title_reference topic
transition
version
warning""".split()
"""A list of names of all concrete Node subclasses."""
class NodeVisitor:
"""
"Visitor" pattern [GoF95]_ abstract superclass implementation for document
tree traversals.
Each node class has corresponding methods, doing nothing by default;
override individual methods for specific and useful behaviour. The
"``visit_`` + node class name" method is called by `Node.walk()` upon
entering a node. `Node.walkabout()` also calls the "``depart_`` + node
class name" method before exiting a node.
This is a base class for visitors whose ``visit_...`` & ``depart_...``
methods should be implemented for *all* node types encountered (such as
for `docutils.writers.Writer` subclasses). Unimplemented methods will
raise exceptions.
For sparse traversals, where only certain node types are of interest,
subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
processing is desired, subclass `GenericNodeVisitor`.
.. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1995.
"""
def __init__(self, document):
self.document = document
def unknown_visit(self, node):
"""
Called when entering unknown `Node` types.
Raise an exception unless overridden.
"""
raise NotImplementedError('visiting unknown node type: %s'
% node.__class__.__name__)
def unknown_departure(self, node):
"""
Called before exiting unknown `Node` types.
Raise exception unless overridden.
"""
raise NotImplementedError('departing unknown node type: %s'
% node.__class__.__name__)
class SparseNodeVisitor(NodeVisitor):
"""
Base class for sparse traversals, where only certain node types are of
interest. When ``visit_...`` & ``depart_...`` methods should be
implemented for *all* node types (such as for `docutils.writers.Writer`
subclasses), subclass `NodeVisitor` instead.
"""
def _nop(self, node):
pass
# Save typing with dynamic assignments:
for _name in node_class_names:
setattr(SparseNodeVisitor, "visit_" + _name, _nop)
setattr(SparseNodeVisitor, "depart_" + _name, _nop)
del _name, _nop
class GenericNodeVisitor(NodeVisitor):
"""
Generic "Visitor" abstract superclass, for simple traversals.
Unless overridden, each ``visit_...`` method calls `default_visit()`, and
each ``depart_...`` method (when using `Node.walkabout()`) calls
`default_departure()`. `default_visit()` (and `default_departure()`) must
be overridden in subclasses.
Define fully generic visitors by overriding `default_visit()` (and
`default_departure()`) only. Define semi-generic visitors by overriding
individual ``visit_...()`` (and ``depart_...()``) methods also.
`NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
be overridden for default behavior.
"""
def default_visit(self, node):
"""Override for generic, uniform traversals."""
raise NotImplementedError
def default_departure(self, node):
"""Override for generic, uniform traversals."""
raise NotImplementedError
def _call_default_visit(self, node):
self.default_visit(node)
def _call_default_departure(self, node):
self.default_departure(node)
# Save typing with dynamic assignments:
for _name in node_class_names:
setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
del _name, _call_default_visit, _call_default_departure
class TreeCopyVisitor(GenericNodeVisitor):
"""
Make a complete copy of a tree or branch, including element attributes.
"""
def __init__(self, document):
GenericNodeVisitor.__init__(self, document)
self.parent_stack = []
self.parent = []
def get_tree_copy(self):
return self.parent[0]
def default_visit(self, node):
"""Copy the current node, and make it the new acting parent."""
newnode = node.copy()
self.parent.append(newnode)
self.parent_stack.append(self.parent)
self.parent = newnode
def default_departure(self, node):
"""Restore the previous acting parent."""
self.parent = self.parent_stack.pop()
class TreePruningException(Exception):
"""
Base class for `NodeVisitor`-related tree pruning exceptions.
Raise subclasses from within ``visit_...`` or ``depart_...`` methods
called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
the tree traversed.
"""
pass
class SkipChildren(TreePruningException):
"""
Do not visit any children of the current node. The current node's
siblings and ``depart_...`` method are not affected.
"""
pass
class SkipSiblings(TreePruningException):
"""
Do not visit any more siblings (to the right) of the current node. The
current node's children and its ``depart_...`` method are not affected.
"""
pass
class SkipNode(TreePruningException):
"""
Do not visit the current node's children, and do not call the current
node's ``depart_...`` method.
"""
pass
class SkipDeparture(TreePruningException):
"""
Do not call the current node's ``depart_...`` method. The current node's
children and siblings are not affected.
"""
pass
class NodeFound(TreePruningException):
"""
Raise to indicate that the target of a search has been found. This
exception must be caught by the client; it is not caught by the traversal
code.
"""
pass
def make_id(string):
"""
Convert `string` into an identifier and return it.
Docutils identifiers will conform to the regular expression
``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
and "id" attributes) should have no underscores, colons, or periods.
Hyphens may be used.
- The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
followed by any number of letters, digits ([0-9]), hyphens ("-"),
underscores ("_"), colons (":"), and periods (".").
- However the `CSS1 spec`_ defines identifiers based on the "name" token,
a tighter interpretation ("flex" tokenizer notation; "latin1" and
"escape" 8-bit characters have been replaced with entities)::
unicode \\[0-9a-f]{1,4}
latin1 [&iexcl;-&yuml;]
escape {unicode}|\\[ -~&iexcl;-&yuml;]
nmchar [-a-z0-9]|{latin1}|{escape}
name {nmchar}+
The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
or periods ("."), therefore "class" and "id" attributes should not contain
these characters. They should be replaced with hyphens ("-"). Combined
with HTML's requirements (the first character must be a letter; no
"unicode", "latin1", or "escape" characters), this results in the
``[a-z](-?[a-z0-9]+)*`` pattern.
.. _HTML 4.01 spec: http://www.w3.org/TR/html401
.. _CSS1 spec: http://www.w3.org/TR/REC-CSS1
"""
id = _non_id_chars.sub('-', ' '.join(string.lower().split()))
id = _non_id_at_ends.sub('', id)
return str(id)
_non_id_chars = re.compile('[^a-z0-9]+')
_non_id_at_ends = re.compile('^[-0-9]+|-+$')
def dupname(node):
node['dupname'] = node['name']
del node['name']
def fully_normalize_name(name):
"""Return a case- and whitespace-normalized name."""
return ' '.join(name.lower().split())
def whitespace_normalize_name(name):
"""Return a whitespace-normalized name."""
return ' '.join(name.split())
# This is *not* the official distribution of Optik. See
# http://optik.sourceforge.net/ for the official distro.
#
# This combined module was converted from the "optik" package for Docutils use
# by David Goodger (2002-06-12). Optik is slated for inclusion in the Python
# standard library as OptionParser.py. Once Optik itself becomes a single
# module, Docutils will include the official module and kill off this
# temporary fork.
"""optik
A powerful, extensible, and easy-to-use command-line parser for Python.
By Greg Ward <gward@python.net>
See http://optik.sourceforge.net/
"""
# Copyright (c) 2001 Gregory P. Ward. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# * Neither the name of the author nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
__revision__ = "$Id: optik.py,v 1.2 2003/02/01 09:26:00 andreasjung Exp $"
__version__ = "1.3+"
import sys
import os
from types import TupleType, ListType, DictType, StringType
from distutils.fancy_getopt import wrap_text
SUPPRESS_HELP = "SUPPRESS"+"HELP"
SUPPRESS_USAGE = "SUPPRESS"+"USAGE"
# Not supplying a default is different from a default of None,
# so we need an explicit "not supplied" value.
NO_DEFAULT = "NO"+"DEFAULT"
class OptikError (Exception):
def __init__ (self, msg):
self.msg = msg
def __str__ (self):
return self.msg
class OptionError (OptikError):
"""
Raised if an Option instance is created with invalid or
inconsistent arguments.
"""
def __init__ (self, msg, option):
self.msg = msg
self.option_id = str(option)
def __str__ (self):
if self.option_id:
return "option %s: %s" % (self.option_id, self.msg)
else:
return self.msg
class OptionConflictError (OptionError):
"""
Raised if conflicting options are added to an OptionParser.
"""
class OptionValueError (OptikError):
"""
Raised if an invalid option value is encountered on the command
line.
"""
class BadOptionError (OptikError):
"""
Raised if an invalid or ambiguous option is seen on the command-line.
"""
_builtin_cvt = { "int" : (int, "integer"),
"long" : (long, "long integer"),
"float" : (float, "floating-point"),
"complex" : (complex, "complex") }
def check_builtin (option, opt, value):
(cvt, what) = _builtin_cvt[option.type]
try:
return cvt(value)
except ValueError:
raise OptionValueError(
#"%s: invalid %s argument %r" % (opt, what, value))
"option %s: invalid %s value: %r" % (opt, what, value))
def check_choice(option, opt, value):
if value in option.choices:
return value
else:
raise OptionValueError(
"option %s: invalid choice: %r (choose one of %r)"
% (opt, value, option.choices))
class Option:
"""
Instance attributes:
_short_opts : [string]
_long_opts : [string]
option_string : string
Set when help output is formatted.
action : string
type : string
dest : string
default : any
nargs : int
const : any
choices : [string]
callback : function
callback_args : (any*)
callback_kwargs : { string : any }
help : string
metavar : string
"""
# The list of instance attributes that may be set through
# keyword args to the constructor.
ATTRS = ['action',
'type',
'dest',
'default',
'nargs',
'const',
'choices',
'callback',
'callback_args',
'callback_kwargs',
'help',
'metavar']
# The set of actions allowed by option parsers. Explicitly listed
# here so the constructor can validate its arguments.
ACTIONS = ("store",
"store_const",
"store_true",
"store_false",
"append",
"count",
"callback",
"help",
"version")
# The set of actions that involve storing a value somewhere;
# also listed just for constructor argument validation. (If
# the action is one of these, there must be a destination.)
STORE_ACTIONS = ("store",
"store_const",
"store_true",
"store_false",
"append",
"count")
# The set of actions for which it makes sense to supply a value
# type, ie. where we expect an argument to this option.
TYPED_ACTIONS = ("store",
"append",
"callback")
# The set of known types for option parsers. Again, listed here for
# constructor argument validation.
TYPES = ("string", "int", "long", "float", "complex", "choice")
# Dictionary of argument checking functions, which convert and
# validate option arguments according to the option type.
#
# Signature of checking functions is:
# check(option : Option, opt : string, value : string) -> any
# where
# option is the Option instance calling the checker
# opt is the actual option seen on the command-line
# (eg. "-a", "--file")
# value is the option argument seen on the command-line
#
# The return value should be in the appropriate Python type
# for option.type -- eg. an integer if option.type == "int".
#
# If no checker is defined for a type, arguments will be
# unchecked and remain strings.
TYPE_CHECKER = { "int" : check_builtin,
"long" : check_builtin,
"float" : check_builtin,
"complex" : check_builtin,
"choice" : check_choice,
}
# CHECK_METHODS is a list of unbound method objects; they are called
# by the constructor, in order, after all attributes are
# initialized. The list is created and filled in later, after all
# the methods are actually defined. (I just put it here because I
# like to define and document all class attributes in the same
# place.) Subclasses that add another _check_*() method should
# define their own CHECK_METHODS list that adds their check method
# to those from this class.
CHECK_METHODS = None
# -- Constructor/initialization methods ----------------------------
def __init__ (self, *opts, **attrs):
# Set _short_opts, _long_opts attrs from 'opts' tuple
opts = self._check_opt_strings(opts)
self._set_opt_strings(opts)
# Set all other attrs (action, type, etc.) from 'attrs' dict
self._set_attrs(attrs)
# Check all the attributes we just set. There are lots of
# complicated interdependencies, but luckily they can be farmed
# out to the _check_*() methods listed in CHECK_METHODS -- which
# could be handy for subclasses! The one thing these all share
# is that they raise OptionError if they discover a problem.
for checker in self.CHECK_METHODS:
checker(self)
def _check_opt_strings (self, opts):
# Filter out None because early versions of Optik had exactly
# one short option and one long option, either of which
# could be None.
opts = filter(None, opts)
if not opts:
raise OptionError("at least one option string must be supplied",
self)
return opts
def _set_opt_strings (self, opts):
self._short_opts = []
self._long_opts = []
for opt in opts:
if len(opt) < 2:
raise OptionError(
"invalid option string %r: "
"must be at least two characters long" % opt, self)
elif len(opt) == 2:
if not (opt[0] == "-" and opt[1] != "-"):
raise OptionError(
"invalid short option string %r: "
"must be of the form -x, (x any non-dash char)" % opt,
self)
self._short_opts.append(opt)
else:
if not (opt[0:2] == "--" and opt[2] != "-"):
raise OptionError(
"invalid long option string %r: "
"must start with --, followed by non-dash" % opt,
self)
self._long_opts.append(opt)
def _set_attrs (self, attrs):
for attr in self.ATTRS:
if attrs.has_key(attr):
setattr(self, attr, attrs[attr])
del attrs[attr]
else:
if attr == 'default':
setattr(self, attr, NO_DEFAULT)
else:
setattr(self, attr, None)
if attrs:
raise OptionError(
"invalid keyword arguments: %s" % ", ".join(attrs.keys()),
self)
# -- Constructor validation methods --------------------------------
def _check_action (self):
if self.action is None:
self.action = "store"
elif self.action not in self.ACTIONS:
raise OptionError("invalid action: %r" % self.action, self)
def _check_type (self):
if self.type is None:
# XXX should factor out another class attr here: list of
# actions that *require* a type
if self.action in ("store", "append"):
if self.choices is not None:
# The "choices" attribute implies "choice" type.
self.type = "choice"
else:
# No type given? "string" is the most sensible default.
self.type = "string"
else:
if self.type not in self.TYPES:
raise OptionError("invalid option type: %r" % self.type, self)
if self.action not in self.TYPED_ACTIONS:
raise OptionError(
"must not supply a type for action %r" % self.action, self)
def _check_choice(self):
if self.type == "choice":
if self.choices is None:
raise OptionError(
"must supply a list of choices for type 'choice'", self)
elif type(self.choices) not in (TupleType, ListType):
raise OptionError(
"choices must be a list of strings ('%s' supplied)"
% str(type(self.choices)).split("'")[1], self)
elif self.choices is not None:
raise OptionError(
"must not supply choices for type %r" % self.type, self)
def _check_dest (self):
if self.action in self.STORE_ACTIONS and self.dest is None:
# No destination given, and we need one for this action.
# Glean a destination from the first long option string,
# or from the first short option string if no long options.
if self._long_opts:
# eg. "--foo-bar" -> "foo_bar"
self.dest = self._long_opts[0][2:].replace('-', '_')
else:
self.dest = self._short_opts[0][1]
def _check_const (self):
if self.action != "store_const" and self.const is not None:
raise OptionError(
"'const' must not be supplied for action %r" % self.action,
self)
def _check_nargs (self):
if self.action in self.TYPED_ACTIONS:
if self.nargs is None:
self.nargs = 1
elif self.nargs is not None:
raise OptionError(
"'nargs' must not be supplied for action %r" % self.action,
self)
def _check_callback (self):
if self.action == "callback":
if not callable(self.callback):
raise OptionError(
"callback not callable: %r" % self.callback, self)
if (self.callback_args is not None and
type(self.callback_args) is not TupleType):
raise OptionError(
"callback_args, if supplied, must be a tuple: not %r"
% self.callback_args, self)
if (self.callback_kwargs is not None and
type(self.callback_kwargs) is not DictType):
raise OptionError(
"callback_kwargs, if supplied, must be a dict: not %r"
% self.callback_kwargs, self)
else:
if self.callback is not None:
raise OptionError(
"callback supplied (%r) for non-callback option"
% self.callback, self)
if self.callback_args is not None:
raise OptionError(
"callback_args supplied for non-callback option", self)
if self.callback_kwargs is not None:
raise OptionError(
"callback_kwargs supplied for non-callback option", self)
CHECK_METHODS = [_check_action,
_check_type,
_check_choice,
_check_dest,
_check_const,
_check_nargs,
_check_callback]
# -- Miscellaneous methods -----------------------------------------
def __str__ (self):
if self._short_opts or self._long_opts:
return "/".join(self._short_opts + self._long_opts)
else:
raise RuntimeError, "short_opts and long_opts both empty!"
def takes_value (self):
return self.type is not None
# -- Processing methods --------------------------------------------
def check_value (self, opt, value):
checker = self.TYPE_CHECKER.get(self.type)
if checker is None:
return value
else:
return checker(self, opt, value)
def process (self, opt, value, values, parser):
# First, convert the value(s) to the right type. Howl if any
# value(s) are bogus.
if value is not None:
if self.nargs == 1:
value = self.check_value(opt, value)
else:
value = tuple([self.check_value(opt, v) for v in value])
# And then take whatever action is expected of us.
# This is a separate method to make life easier for
# subclasses to add new actions.
return self.take_action(
self.action, self.dest, opt, value, values, parser)
def take_action (self, action, dest, opt, value, values, parser):
if action == "store":
setattr(values, dest, value)
elif action == "store_const":
setattr(values, dest, self.const)
elif action == "store_true":
setattr(values, dest, 1)
elif action == "store_false":
setattr(values, dest, 0)
elif action == "append":
values.ensure_value(dest, []).append(value)
elif action == "count":
setattr(values, dest, values.ensure_value(dest, 0) + 1)
elif action == "callback":
args = self.callback_args or ()
kwargs = self.callback_kwargs or {}
self.callback(self, opt, value, parser, *args, **kwargs)
elif action == "help":
parser.print_help()
sys.exit(0)
elif action == "version":
parser.print_version()
sys.exit(0)
else:
raise RuntimeError, "unknown action %r" % self.action
return 1
# class Option
# Some day, there might be many Option classes. As of Optik 1.3, the
# preferred way to instantiate Options is indirectly, via make_option(),
# which will become a factory function when there are many Option
# classes.
make_option = Option
STD_HELP_OPTION = Option("-h", "--help",
action="help",
help="show this help message and exit")
STD_VERSION_OPTION = Option("--version",
action="version",
help="show program's version number and exit")
class OptionContainer:
"""
Abstract base class.
Class attributes:
standard_option_list : [Option]
List of standard options that will be accepted by all instances
of this parser class (intended to be overridden by subclasses).
Instance attributes:
option_list : [Option]
The list of Option objects contained by this OptionContainer.
_short_opt : { string : Option }
Dictionary mapping short option strings, eg. "-f" or "-X",
to the Option instances that implement them. If an Option
has multiple short option strings, it will appears in this
dictionary multiple times. [1]
_long_opt : { string : Option }
Dictionary mapping long option strings, eg. "--file" or
"--exclude", to the Option instances that implement them.
Again, a given Option can occur multiple times in this
dictionary. [1]
defaults : { string : any }
Dictionary mapping option destination names to default
values for each destination. [1]
[1] These mappings are common to (shared by) all components of the
controlling OptionParser, where they are initially created.
"""
standard_option_list = []
def __init__(self, parser, description=None, option_list=None):
# List of Options is local to this OptionContainer.
self.option_list = []
# The shared mappings are stored in the parser.
self._short_opt = parser._short_opt
self._long_opt = parser._long_opt
self.defaults = parser.defaults
#
self.format = parser.format
self.option_class = parser.option_class
self.conflict_handler = parser.conflict_handler
self.set_description(description)
self._populate_option_list(option_list)
def set_description(self, description):
self.description = description
def _populate_option_list(self, option_list, version=None, help=None):
if self.standard_option_list:
self.add_options(self.standard_option_list)
if option_list:
self.add_options(option_list)
if version:
self.add_option(STD_VERSION_OPTION)
if help:
self.add_option(STD_HELP_OPTION)
# -- Option-adding methods -----------------------------------------
def _check_conflict (self, option):
conflict_opts = []
for opt in option._short_opts:
if self._short_opt.has_key(opt):
conflict_opts.append((opt, self._short_opt[opt]))
for opt in option._long_opts:
if self._long_opt.has_key(opt):
conflict_opts.append((opt, self._long_opt[opt]))
if conflict_opts:
handler = self.conflict_handler
if handler == "ignore": # behaviour for Optik 1.0, 1.1
pass
elif handler == "error": # new in 1.2
raise OptionConflictError(
"conflicting option string(s): %s"
% ", ".join([co[0] for co in conflict_opts]),
option)
elif handler == "resolve": # new in 1.2
for (opt, c_option) in conflict_opts:
if opt.startswith("--"):
c_option._long_opts.remove(opt)
del self._long_opt[opt]
else:
c_option._short_opts.remove(opt)
del self._short_opt[opt]
if not (c_option._short_opts or c_option._long_opts):
c_option.container.option_list.remove(c_option)
def add_option (self, *args, **kwargs):
"""add_option(Option)
add_option(opt_str, ..., kwarg=val, ...)
"""
if type(args[0]) is StringType:
option = self.option_class(*args, **kwargs)
elif len(args) == 1 and not kwargs:
option = args[0]
if not isinstance(option, Option):
raise TypeError, "not an Option instance: %r" % option
else:
raise TypeError, "invalid arguments"
self._check_conflict(option)
self.option_list.append(option)
option.container = self
for opt in option._short_opts:
self._short_opt[opt] = option
for opt in option._long_opts:
self._long_opt[opt] = option
if option.dest is not None: # option has a dest, we need a default
if option.default is not NO_DEFAULT:
self.defaults[option.dest] = option.default
elif not self.defaults.has_key(option.dest):
self.defaults[option.dest] = None
def add_options (self, option_list):
for option in option_list:
self.add_option(option)
# -- Option query/removal methods ----------------------------------
def get_option (self, opt_str):
return (self._short_opt.get(opt_str) or
self._long_opt.get(opt_str))
def has_option (self, opt_str):
return (self._short_opt.has_key(opt_str) or
self._long_opt.has_key(opt_str))
def remove_option (self, opt_str):
option = self._short_opt.get(opt_str)
if option is None:
option = self._long_opt.get(opt_str)
if option is None:
raise ValueError("no such option %r" % opt_str)
for opt in option._short_opts:
del self._short_opt[opt]
for opt in option._long_opts:
del self._long_opt[opt]
option.container.option_list.remove(option)
# -- Feedback methods ----------------------------------------------
def get_option_help(self):
if not self.option_list:
return ""
result = [] # list of strings to "".join() later
for option in self.option_list:
if not option.help is SUPPRESS_HELP:
result.append(self.format.format_option(option))
return "".join(result)
def get_description_help(self):
if self.description:
return self.format.format_description(self.description)
else:
return ""
def get_help(self):
result = ""
if self.description:
result = self.get_description_help() + "\n"
return result + self.get_option_help()
class OptionGroup(OptionContainer):
def __init__(self, parser, title, description=None):
OptionContainer.__init__(self, parser, description)
self.parser = parser
self.title = title
self.description = description
def set_title(self, title):
self.title = title
def get_help(self):
result = self.format.format_heading(self.title)
self.format.increase_nesting()
result += OptionContainer.get_help(self)
self.format.decrease_nesting()
return result
class Values:
def __init__ (self, defaults=None):
if defaults:
for (attr, val) in defaults.items():
setattr(self, attr, val)
def __repr__(self):
return "%s(%r)" % (self.__class__, self.__dict__)
def _update_careful (self, dict):
"""
Update the option values from an arbitrary dictionary, but only
use keys from dict that already have a corresponding attribute
in self. Any keys in dict without a corresponding attribute
are silently ignored.
"""
for attr in dir(self):
if dict.has_key(attr):
dval = dict[attr]
if dval is not None:
setattr(self, attr, dval)
def _update_loose (self, dict):
"""
Update the option values from an arbitrary dictionary,
using all keys from the dictionary regardless of whether
they have a corresponding attribute in self or not.
"""
self.__dict__.update(dict)
def _update (self, dict, mode):
if mode == "careful":
self._update_careful(dict)
elif mode == "loose":
self._update_loose(dict)
else:
raise ValueError, "invalid update mode: %r" % mode
def read_module (self, modname, mode="careful"):
__import__(modname)
mod = sys.modules[modname]
self._update(vars(mod), mode)
def read_file (self, filename, mode="careful"):
vars = {}
execfile(filename, vars)
self._update(vars, mode)
def ensure_value (self, attr, value):
if not hasattr(self, attr) or getattr(self, attr) is None:
setattr(self, attr, value)
return getattr(self, attr)
class OptionParser(OptionContainer):
"""
Instance attributes:
usage : string
a usage string for your program. Before it is displayed
to the user, "%prog" will be expanded to the name of
your program (os.path.basename(sys.argv[0])).
allow_interspersed_args : boolean = true
If true, positional arguments may be interspersed with options.
Assuming -a and -b each take a single argument, the command-line
-ablah foo bar -bboo baz
will be interpreted the same as
-ablah -bboo -- foo bar baz
If this flag were false, that command line would be interpreted as
-ablah -- foo bar -bboo baz
-- ie. we stop processing options as soon as we see the first
non-option argument. (This is the tradition followed by
Python's getopt module, Perl's Getopt::Std, and other argument-
parsing libraries, but it is generally annoying to users.)
rargs : [string]
the argument list currently being parsed. Only set when
parse_args() is active, and continually trimmed down as
we consume arguments. Mainly there for the benefit of
callback options.
largs : [string]
the list of leftover arguments that we have skipped while
parsing options. If allow_interspersed_args is false, this
list is always empty.
values : Values
the set of option values currently being accumulated. Only
set when parse_args() is active. Also mainly for callbacks.
Because of the 'rargs', 'largs', and 'values' attributes,
OptionParser is not thread-safe. If, for some perverse reason, you
need to parse command-line arguments simultaneously in different
threads, use different OptionParser instances.
"""
def __init__ (self,
usage=None,
description=None,
option_list=None,
option_class=Option,
version=None,
help=1,
conflict_handler="error",
format=None):
"""Override OptionContainer.__init__."""
self.set_usage(usage)
self.set_description(description)
self.option_class = option_class
self.version = version
self.set_conflict_handler(conflict_handler)
self.allow_interspersed_args = 1
if not format:
format = Indented()
self.format = format
# Create the various lists and dicts that constitute the
# "option list". See class docstring for details about
# each attribute.
self._create_option_list()
# Populate the option list; initial sources are the
# standard_option_list class attribute, the 'option_list' argument,
# and the STD_VERSION_OPTION and STD_HELP_OPTION globals (if 'version'
# and/or 'help' supplied).
self._populate_option_list(option_list, version=version, help=help)
self._init_parsing_state()
# -- Private methods -----------------------------------------------
# (used by the constructor)
def _create_option_list (self):
self.option_list = []
self.option_groups = []
self._short_opt = {} # single letter -> Option instance
self._long_opt = {} # long option -> Option instance
self.defaults = {} # maps option dest -> default value
def _init_parsing_state (self):
# These are set in parse_args() for the convenience of callbacks.
self.rargs = None
self.largs = None
self.values = None
# -- Simple modifier methods ---------------------------------------
def set_usage (self, usage):
if usage is None:
self.usage = "%prog [options]"
elif usage is SUPPRESS_USAGE:
self.usage = None
else:
self.usage = usage
def enable_interspersed_args (self):
self.allow_interspersed_args = 1
def disable_interspersed_args (self):
self.allow_interspersed_args = 0
def set_conflict_handler (self, handler):
if handler not in ("ignore", "error", "resolve"):
raise ValueError, "invalid conflict_resolution value %r" % handler
self.conflict_handler = handler
def set_default (self, dest, value):
self.defaults[dest] = value
def set_defaults (self, **kwargs):
self.defaults.update(kwargs)
def get_default_values(self):
return Values(self.defaults)
# -- OptionGroup methods -------------------------------------------
def add_option_group(self, group):
self.option_groups.append(group)
def get_option_group(self, opt_str):
option = (self._short_opt.get(opt_str) or
self._long_opt.get(opt_str))
if option and option.container is not self:
return option.container
return None
# -- Option-parsing methods ----------------------------------------
def _get_args (self, args):
if args is None:
return sys.argv[1:]
else:
return args[:] # don't modify caller's list
def parse_args (self, args=None, values=None):
"""
parse_args(args : [string] = sys.argv[1:],
values : Values = None)
-> (values : Values, args : [string])
Parse the command-line options found in 'args' (default:
sys.argv[1:]). Any errors result in a call to 'error()', which
by default prints the usage message to stderr and calls
sys.exit() with an error message. On success returns a pair
(values, args) where 'values' is an Values instance (with all
your option values) and 'args' is the list of arguments left
over after parsing options.
"""
rargs = self._get_args(args)
if values is None:
values = self.get_default_values()
# Store the halves of the argument list as attributes for the
# convenience of callbacks:
# rargs
# the rest of the command-line (the "r" stands for
# "remaining" or "right-hand")
# largs
# the leftover arguments -- ie. what's left after removing
# options and their arguments (the "l" stands for "leftover"
# or "left-hand")
self.rargs = rargs
self.largs = largs = []
self.values = values
try:
stop = self._process_args(largs, rargs, values)
except (BadOptionError, OptionValueError), err:
self.error(err.msg)
args = largs + rargs
return self.check_values(values, args)
def check_values (self, values, args):
"""
check_values(values : Values, args : [string])
-> (values : Values, args : [string])
Check that the supplied option values and leftover arguments are
valid. Returns the option values and leftover arguments
(possibly adjusted, possibly completely new -- whatever you
like). Default implementation just returns the passed-in
values; subclasses may override as desired.
"""
return (values, args)
def _process_args (self, largs, rargs, values):
"""_process_args(largs : [string],
rargs : [string],
values : Values)
Process command-line arguments and populate 'values', consuming
options and arguments from 'rargs'. If 'allow_interspersed_args' is
false, stop at the first non-option argument. If true, accumulate any
interspersed non-option arguments in 'largs'.
"""
while rargs:
arg = rargs[0]
# We handle bare "--" explicitly, and bare "-" is handled by the
# standard arg handler since the short arg case ensures that the
# len of the opt string is greater than 1.
if arg == "--":
del rargs[0]
return
elif arg[0:2] == "--":
# process a single long option (possibly with value(s))
self._process_long_opt(rargs, values)
elif arg[:1] == "-" and len(arg) > 1:
# process a cluster of short options (possibly with
# value(s) for the last one only)
self._process_short_opts(rargs, values)
elif self.allow_interspersed_args:
largs.append(arg)
del rargs[0]
else:
return # stop now, leave this arg in rargs
# Say this is the original argument list:
# [arg0, arg1, ..., arg(i-1), arg(i), arg(i+1), ..., arg(N-1)]
# ^
# (we are about to process arg(i)).
#
# Then rargs is [arg(i), ..., arg(N-1)] and largs is a *subset* of
# [arg0, ..., arg(i-1)] (any options and their arguments will have
# been removed from largs).
#
# The while loop will usually consume 1 or more arguments per pass.
# If it consumes 1 (eg. arg is an option that takes no arguments),
# then after _process_arg() is done the situation is:
#
# largs = subset of [arg0, ..., arg(i)]
# rargs = [arg(i+1), ..., arg(N-1)]
#
# If allow_interspersed_args is false, largs will always be
# *empty* -- still a subset of [arg0, ..., arg(i-1)], but
# not a very interesting subset!
def _match_long_opt (self, opt):
"""_match_long_opt(opt : string) -> string
Determine which long option string 'opt' matches, ie. which one
it is an unambiguous abbrevation for. Raises BadOptionError if
'opt' doesn't unambiguously match any long option string.
"""
return _match_abbrev(opt, self._long_opt)
def _process_long_opt (self, rargs, values):
arg = rargs.pop(0)
# Value explicitly attached to arg? Pretend it's the next
# argument.
if "=" in arg:
(opt, next_arg) = arg.split("=", 1)
rargs.insert(0, next_arg)
had_explicit_value = 1
else:
opt = arg
had_explicit_value = 0
opt = self._match_long_opt(opt)
option = self._long_opt[opt]
if option.takes_value():
nargs = option.nargs
if len(rargs) < nargs:
if nargs == 1:
self.error("%s option requires a value" % opt)
else:
self.error("%s option requires %d values"
% (opt, nargs))
elif nargs == 1:
value = rargs.pop(0)
else:
value = tuple(rargs[0:nargs])
del rargs[0:nargs]
elif had_explicit_value:
self.error("%s option does not take a value" % opt)
else:
value = None
option.process(opt, value, values, self)
def _process_short_opts (self, rargs, values):
arg = rargs.pop(0)
stop = 0
i = 1
for ch in arg[1:]:
opt = "-" + ch
option = self._short_opt.get(opt)
i += 1 # we have consumed a character
if not option:
self.error("no such option: %s" % opt)
if option.takes_value():
# Any characters left in arg? Pretend they're the
# next arg, and stop consuming characters of arg.
if i < len(arg):
rargs.insert(0, arg[i:])
stop = 1
nargs = option.nargs
if len(rargs) < nargs:
if nargs == 1:
self.error("%s option requires a value" % opt)
else:
self.error("%s option requires %s values"
% (opt, nargs))
elif nargs == 1:
value = rargs.pop(0)
else:
value = tuple(rargs[0:nargs])
del rargs[0:nargs]
else: # option doesn't take a value
value = None
option.process(opt, value, values, self)
if stop:
break
# -- Feedback methods ----------------------------------------------
def error (self, msg):
"""error(msg : string)
Print a usage message incorporating 'msg' to stderr and exit.
If you override this in a subclass, it should not return -- it
should either exit or raise an exception.
"""
self.print_usage(sys.stderr)
sys.exit("%s: error: %s" % (get_prog_name(), msg))
def get_usage_help(self):
if self.usage:
return self.format.format_usage(
self.usage.replace("%prog", get_prog_name()))
else:
return ""
def print_usage (self, file=None):
"""print_usage(file : file = stdout)
Print the usage message for the current program (self.usage) to
'file' (default stdout). Any occurence of the string "%prog" in
self.usage is replaced with the name of the current program
(basename of sys.argv[0]). Does nothing if self.usage is empty
or not defined.
"""
if self.usage:
print >>file, self.get_usage_help()
def get_version_help(self):
if self.version:
return self.version.replace("%prog", get_prog_name())
else:
return ""
def print_version (self, file=None):
"""print_version(file : file = stdout)
Print the version message for this program (self.version) to
'file' (default stdout). As with print_usage(), any occurence
of "%prog" in self.version is replaced by the current program's
name. Does nothing if self.version is empty or undefined.
"""
if self.version:
print >>file, self.get_version_help()
def get_option_help(self):
self.format.store_option_strings(self)
result = []
result.append(self.format.format_heading("Options"))
self.format.increase_nesting()
if self.option_list:
result.append(OptionContainer.get_option_help(self))
result.append("\n")
for group in self.option_groups:
result.append(group.get_help())
result.append("\n")
self.format.decrease_nesting()
# Drop the last "\n", or the header if no options or option groups:
return "".join(result[:-1])
def get_help(self):
result = []
if self.usage:
result.append(self.get_usage_help() + "\n")
if self.description:
result.append(self.get_description_help() + "\n")
result.append(self.get_option_help())
return "".join(result)
def print_help (self, file=None):
"""print_help(file : file = stdout)
Print an extended help message, listing all options and any
help text provided with them, to 'file' (default stdout).
"""
if file is None:
file = sys.stdout
file.write(self.get_help())
# class OptionParser
class HelpFormat:
"""
"--help" output format; abstract base class (Strategy design pattern).
Instance attributes:
indent_increment : int
The number of columns to indent per nesting level.
max_help_position : int
The maximum starting column for option help text.
help_position : int
The calculated starting column for option help text;
initially the same as the maximum.
width : int
The overall width (in columns) for output.
current_indent : int
In columns, calculated.
level : int
Increased for each additional nesting level.
help_width : int
Number of columns available for option help text, calculated.
"""
def __init__(self, indent_increment, max_help_position, width,
short_first):
self.indent_increment = indent_increment
self.help_position = self.max_help_position = max_help_position
self.width = width
self.current_indent = 0
self.level = 0
self.help_width = width - max_help_position
if short_first:
self.format_option_strings = self.format_option_strings_short_first
else:
self.format_option_strings = self.format_option_strings_long_first
def increase_nesting(self):
self.current_indent += self.indent_increment
self.level += 1
def decrease_nesting(self):
self.current_indent -= self.indent_increment
assert self.current_indent >= 0, "Indent decreased below 0."
self.level -= 1
def format_usage(self, usage):
raise NotImplementedError
def format_heading(self, heading):
raise NotImplementedError
def format_description(self, description):
desc_width = self.width - self.current_indent
desc_lines = wrap_text(description, desc_width)
result = ["%*s%s\n" % (self.current_indent, "", line)
for line in desc_lines]
return "".join(result)
def format_option(self, option):
# The help for each option consists of two parts:
# * the opt strings and metavars
# eg. ("-x", or "-fFILENAME, --file=FILENAME")
# * the user-supplied help string
# eg. ("turn on expert mode", "read data from FILENAME")
#
# If possible, we write both of these on the same line:
# -x turn on expert mode
#
# But if the opt string list is too long, we put the help
# string on a second line, indented to the same column it would
# start in if it fit on the first line.
# -fFILENAME, --file=FILENAME
# read data from FILENAME
result = []
opts = option.option_strings
opt_width = self.help_position - self.current_indent - 2
if len(opts) > opt_width:
opts = "%*s%s\n" % (self.current_indent, "", opts)
indent_first = self.help_position
else: # start help on same line as opts
opts = "%*s%-*s " % (self.current_indent, "", opt_width, opts)
indent_first = 0
result.append(opts)
if option.help:
help_lines = wrap_text(option.help, self.help_width)
result.append("%*s%s\n" % (indent_first, "", help_lines[0]))
result.extend(["%*s%s\n" % (self.help_position, "", line)
for line in help_lines[1:]])
elif opts[-1] != "\n":
result.append("\n")
return "".join(result)
def store_option_strings(self, parser):
self.increase_nesting()
max_len = 0
for opt in parser.option_list:
strings = self.format_option_strings(opt)
opt.option_strings = strings
max_len = max(max_len, len(strings) + self.current_indent)
self.increase_nesting()
for group in parser.option_groups:
for opt in group.option_list:
strings = self.format_option_strings(opt)
opt.option_strings = strings
max_len = max(max_len, len(strings) + self.current_indent)
self.decrease_nesting()
self.decrease_nesting()
self.help_position = min(max_len + 2, self.max_help_position)
def format_option_strings(self, option):
"""Return a comma-separated list of option strigs & metavariables."""
raise NotImplementedError(
"Virtual method; use format_option_strings_short_first or "
"format_option_strings_long_first instead.")
def format_option_strings_short_first(self, option):
opts = [] # list of "-a" or "--foo=FILE" strings
takes_value = option.takes_value()
if takes_value:
metavar = option.metavar or option.dest.upper()
for sopt in option._short_opts:
opts.append(sopt + metavar)
for lopt in option._long_opts:
opts.append(lopt + "=" + metavar)
else:
for opt in option._short_opts + option._long_opts:
opts.append(opt)
return ", ".join(opts)
def format_option_strings_long_first(self, option):
opts = [] # list of "-a" or "--foo=FILE" strings
takes_value = option.takes_value()
if takes_value:
metavar = option.metavar or option.dest.upper()
for lopt in option._long_opts:
opts.append(lopt + "=" + metavar)
for sopt in option._short_opts:
opts.append(sopt + metavar)
else:
for opt in option._long_opts + option._short_opts:
opts.append(opt)
return ", ".join(opts)
class Indented(HelpFormat):
"""Formats help with indented section bodies."""
def __init__(self, indent_increment=2, max_help_position=24, width=78,
short_first=1):
HelpFormat.__init__(self, indent_increment, max_help_position, width,
short_first)
def format_usage(self, usage):
return "Usage: %s\n" % usage
def format_heading(self, heading):
return "%*s%s:\n" % (self.current_indent, "", heading)
class Titled(HelpFormat):
"""Formats help with underlined section headers."""
def __init__(self, indent_increment=0, max_help_position=24, width=78,
short_first=None):
HelpFormat.__init__(self, indent_increment, max_help_position, width,
short_first)
def format_usage(self, usage):
return "%s %s\n" % (self.format_heading("Usage"), usage)
def format_heading(self, heading):
return "%s\n%s\n" % (heading, "=-"[self.level] * len(heading))
def _match_abbrev (s, wordmap):
"""_match_abbrev(s : string, wordmap : {string : Option}) -> string
Return the string key in 'wordmap' for which 's' is an unambiguous
abbreviation. If 's' is found to be ambiguous or doesn't match any of
'words', raise BadOptionError.
"""
# Is there an exact match?
if wordmap.has_key(s):
return s
else:
# Isolate all words with s as a prefix.
possibilities = [word for word in wordmap.keys()
if word.startswith(s)]
# No exact match, so there had better be just one possibility.
if len(possibilities) == 1:
return possibilities[0]
elif not possibilities:
raise BadOptionError("no such option: %s" % s)
else:
# More than one possible completion: ambiguous prefix.
raise BadOptionError("ambiguous option: %s (%s?)"
% (s, ", ".join(possibilities)))
def get_prog_name ():
return os.path.basename(sys.argv[0])
"""optparse - a powerful, extensible, and easy-to-use option parser.
By Greg Ward <gward@python.net>
Originally distributed as Optik; see http://optik.sourceforge.net/ .
If you have problems with this module, please do not file bugs,
patches, or feature requests with Python; instead, use Optik's
SourceForge project page:
http://sourceforge.net/projects/optik
For support, use the optik-users@lists.sourceforge.net mailing list
(http://lists.sourceforge.net/lists/listinfo/optik-users).
"""
# Python developers: please do not make changes to this file, since
# it is automatically generated from the Optik source code.
__version__ = "1.4.1+"
__all__ = ['Option',
'SUPPRESS_HELP',
'SUPPRESS_USAGE',
'STD_HELP_OPTION',
'STD_VERSION_OPTION',
'Values',
'OptionContainer',
'OptionGroup',
'OptionParser',
'HelpFormatter',
'IndentedHelpFormatter',
'TitledHelpFormatter',
'OptParseError',
'OptionError',
'OptionConflictError',
'OptionValueError',
'BadOptionError']
__copyright__ = """
Copyright (c) 2001-2003 Gregory P. Ward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
import sys, os
import types
import textwrap
class OptParseError (Exception):
def __init__ (self, msg):
self.msg = msg
def __str__ (self):
return self.msg
class OptionError (OptParseError):
"""
Raised if an Option instance is created with invalid or
inconsistent arguments.
"""
def __init__ (self, msg, option):
self.msg = msg
self.option_id = str(option)
def __str__ (self):
if self.option_id:
return "option %s: %s" % (self.option_id, self.msg)
else:
return self.msg
class OptionConflictError (OptionError):
"""
Raised if conflicting options are added to an OptionParser.
"""
class OptionValueError (OptParseError):
"""
Raised if an invalid option value is encountered on the command
line.
"""
class BadOptionError (OptParseError):
"""
Raised if an invalid or ambiguous option is seen on the command-line.
"""
class HelpFormatter:
"""
Abstract base class for formatting option help. OptionParser
instances should use one of the HelpFormatter subclasses for
formatting help; by default IndentedHelpFormatter is used.
Instance attributes:
indent_increment : int
the number of columns to indent per nesting level
max_help_position : int
the maximum starting column for option help text
help_position : int
the calculated starting column for option help text;
initially the same as the maximum
width : int
total number of columns for output
level : int
current indentation level
current_indent : int
current indentation level (in columns)
help_width : int
number of columns available for option help text (calculated)
"""
def __init__ (self,
indent_increment,
max_help_position,
width,
short_first):
self.indent_increment = indent_increment
self.help_position = self.max_help_position = max_help_position
self.width = width
self.current_indent = 0
self.level = 0
self.help_width = width - max_help_position
self.short_first = short_first
def indent (self):
self.current_indent += self.indent_increment
self.level += 1
def dedent (self):
self.current_indent -= self.indent_increment
assert self.current_indent >= 0, "Indent decreased below 0."
self.level -= 1
def format_usage (self, usage):
raise NotImplementedError, "subclasses must implement"
def format_heading (self, heading):
raise NotImplementedError, "subclasses must implement"
def format_description (self, description):
desc_width = self.width - self.current_indent
indent = " "*self.current_indent
return textwrap.fill(description, desc_width,
initial_indent=indent,
subsequent_indent=indent)
def format_option (self, option):
# The help for each option consists of two parts:
# * the opt strings and metavars
# eg. ("-x", or "-fFILENAME, --file=FILENAME")
# * the user-supplied help string
# eg. ("turn on expert mode", "read data from FILENAME")
#
# If possible, we write both of these on the same line:
# -x turn on expert mode
#
# But if the opt string list is too long, we put the help
# string on a second line, indented to the same column it would
# start in if it fit on the first line.
# -fFILENAME, --file=FILENAME
# read data from FILENAME
result = []
opts = option.option_strings
opt_width = self.help_position - self.current_indent - 2
if len(opts) > opt_width:
opts = "%*s%s\n" % (self.current_indent, "", opts)
indent_first = self.help_position
else: # start help on same line as opts
opts = "%*s%-*s " % (self.current_indent, "", opt_width, opts)
indent_first = 0
result.append(opts)
if option.help:
help_lines = textwrap.wrap(option.help, self.help_width)
result.append("%*s%s\n" % (indent_first, "", help_lines[0]))
result.extend(["%*s%s\n" % (self.help_position, "", line)
for line in help_lines[1:]])
elif opts[-1] != "\n":
result.append("\n")
return "".join(result)
def store_option_strings (self, parser):
self.indent()
max_len = 0
for opt in parser.option_list:
strings = self.format_option_strings(opt)
opt.option_strings = strings
max_len = max(max_len, len(strings) + self.current_indent)
self.indent()
for group in parser.option_groups:
for opt in group.option_list:
strings = self.format_option_strings(opt)
opt.option_strings = strings
max_len = max(max_len, len(strings) + self.current_indent)
self.dedent()
self.dedent()
self.help_position = min(max_len + 2, self.max_help_position)
def format_option_strings (self, option):
"""Return a comma-separated list of option strings & metavariables."""
if option.takes_value():
metavar = option.metavar or option.dest.upper()
short_opts = [sopt + metavar for sopt in option._short_opts]
long_opts = [lopt + "=" + metavar for lopt in option._long_opts]
else:
short_opts = option._short_opts
long_opts = option._long_opts
if self.short_first:
opts = short_opts + long_opts
else:
opts = long_opts + short_opts
return ", ".join(opts)
class IndentedHelpFormatter (HelpFormatter):
"""Format help with indented section bodies.
"""
def __init__ (self,
indent_increment=2,
max_help_position=24,
width=80,
short_first=1):
HelpFormatter.__init__(
self, indent_increment, max_help_position, width, short_first)
def format_usage (self, usage):
return "usage: %s\n" % usage
def format_heading (self, heading):
return "%*s%s:\n" % (self.current_indent, "", heading)
class TitledHelpFormatter (HelpFormatter):
"""Format help with underlined section headers.
"""
def __init__ (self,
indent_increment=0,
max_help_position=24,
width=80,
short_first=0):
HelpFormatter.__init__ (
self, indent_increment, max_help_position, width, short_first)
def format_usage (self, usage):
return "%s %s\n" % (self.format_heading("Usage"), usage)
def format_heading (self, heading):
return "%s\n%s\n" % (heading, "=-"[self.level] * len(heading))
_builtin_cvt = { "int" : (int, "integer"),
"long" : (long, "long integer"),
"float" : (float, "floating-point"),
"complex" : (complex, "complex") }
def check_builtin (option, opt, value):
(cvt, what) = _builtin_cvt[option.type]
try:
return cvt(value)
except ValueError:
raise OptionValueError(
#"%s: invalid %s argument %r" % (opt, what, value))
"option %s: invalid %s value: %r" % (opt, what, value))
def check_choice(option, opt, value):
if value in option.choices:
return value
else:
choices = ", ".join(map(repr, option.choices))
raise OptionValueError(
"option %s: invalid choice: %r (choose from %s)"
% (opt, value, choices))
# Not supplying a default is different from a default of None,
# so we need an explicit "not supplied" value.
NO_DEFAULT = "NO"+"DEFAULT"
class Option:
"""
Instance attributes:
_short_opts : [string]
_long_opts : [string]
action : string
type : string
dest : string
default : any
nargs : int
const : any
choices : [string]
callback : function
callback_args : (any*)
callback_kwargs : { string : any }
help : string
metavar : string
"""
# The list of instance attributes that may be set through
# keyword args to the constructor.
ATTRS = ['action',
'type',
'dest',
'default',
'nargs',
'const',
'choices',
'callback',
'callback_args',
'callback_kwargs',
'help',
'metavar']
# The set of actions allowed by option parsers. Explicitly listed
# here so the constructor can validate its arguments.
ACTIONS = ("store",
"store_const",
"store_true",
"store_false",
"append",
"count",
"callback",
"help",
"version")
# The set of actions that involve storing a value somewhere;
# also listed just for constructor argument validation. (If
# the action is one of these, there must be a destination.)
STORE_ACTIONS = ("store",
"store_const",
"store_true",
"store_false",
"append",
"count")
# The set of actions for which it makes sense to supply a value
# type, ie. where we expect an argument to this option.
TYPED_ACTIONS = ("store",
"append",
"callback")
# The set of known types for option parsers. Again, listed here for
# constructor argument validation.
TYPES = ("string", "int", "long", "float", "complex", "choice")
# Dictionary of argument checking functions, which convert and
# validate option arguments according to the option type.
#
# Signature of checking functions is:
# check(option : Option, opt : string, value : string) -> any
# where
# option is the Option instance calling the checker
# opt is the actual option seen on the command-line
# (eg. "-a", "--file")
# value is the option argument seen on the command-line
#
# The return value should be in the appropriate Python type
# for option.type -- eg. an integer if option.type == "int".
#
# If no checker is defined for a type, arguments will be
# unchecked and remain strings.
TYPE_CHECKER = { "int" : check_builtin,
"long" : check_builtin,
"float" : check_builtin,
"complex" : check_builtin,
"choice" : check_choice,
}
# CHECK_METHODS is a list of unbound method objects; they are called
# by the constructor, in order, after all attributes are
# initialized. The list is created and filled in later, after all
# the methods are actually defined. (I just put it here because I
# like to define and document all class attributes in the same
# place.) Subclasses that add another _check_*() method should
# define their own CHECK_METHODS list that adds their check method
# to those from this class.
CHECK_METHODS = None
# -- Constructor/initialization methods ----------------------------
def __init__ (self, *opts, **attrs):
# Set _short_opts, _long_opts attrs from 'opts' tuple.
# Have to be set now, in case no option strings are supplied.
self._short_opts = []
self._long_opts = []
opts = self._check_opt_strings(opts)
self._set_opt_strings(opts)
# Set all other attrs (action, type, etc.) from 'attrs' dict
self._set_attrs(attrs)
# Check all the attributes we just set. There are lots of
# complicated interdependencies, but luckily they can be farmed
# out to the _check_*() methods listed in CHECK_METHODS -- which
# could be handy for subclasses! The one thing these all share
# is that they raise OptionError if they discover a problem.
for checker in self.CHECK_METHODS:
checker(self)
def _check_opt_strings (self, opts):
# Filter out None because early versions of Optik had exactly
# one short option and one long option, either of which
# could be None.
opts = filter(None, opts)
if not opts:
raise TypeError("at least one option string must be supplied")
return opts
def _set_opt_strings (self, opts):
for opt in opts:
if len(opt) < 2:
raise OptionError(
"invalid option string %r: "
"must be at least two characters long" % opt, self)
elif len(opt) == 2:
if not (opt[0] == "-" and opt[1] != "-"):
raise OptionError(
"invalid short option string %r: "
"must be of the form -x, (x any non-dash char)" % opt,
self)
self._short_opts.append(opt)
else:
if not (opt[0:2] == "--" and opt[2] != "-"):
raise OptionError(
"invalid long option string %r: "
"must start with --, followed by non-dash" % opt,
self)
self._long_opts.append(opt)
def _set_attrs (self, attrs):
for attr in self.ATTRS:
if attrs.has_key(attr):
setattr(self, attr, attrs[attr])
del attrs[attr]
else:
if attr == 'default':
setattr(self, attr, NO_DEFAULT)
else:
setattr(self, attr, None)
if attrs:
raise OptionError(
"invalid keyword arguments: %s" % ", ".join(attrs.keys()),
self)
# -- Constructor validation methods --------------------------------
def _check_action (self):
if self.action is None:
self.action = "store"
elif self.action not in self.ACTIONS:
raise OptionError("invalid action: %r" % self.action, self)
def _check_type (self):
if self.type is None:
# XXX should factor out another class attr here: list of
# actions that *require* a type
if self.action in ("store", "append"):
if self.choices is not None:
# The "choices" attribute implies "choice" type.
self.type = "choice"
else:
# No type given? "string" is the most sensible default.
self.type = "string"
else:
if self.type not in self.TYPES:
raise OptionError("invalid option type: %r" % self.type, self)
if self.action not in self.TYPED_ACTIONS:
raise OptionError(
"must not supply a type for action %r" % self.action, self)
def _check_choice(self):
if self.type == "choice":
if self.choices is None:
raise OptionError(
"must supply a list of choices for type 'choice'", self)
elif type(self.choices) not in (types.TupleType, types.ListType):
raise OptionError(
"choices must be a list of strings ('%s' supplied)"
% str(type(self.choices)).split("'")[1], self)
elif self.choices is not None:
raise OptionError(
"must not supply choices for type %r" % self.type, self)
def _check_dest (self):
if self.action in self.STORE_ACTIONS and self.dest is None:
# No destination given, and we need one for this action.
# Glean a destination from the first long option string,
# or from the first short option string if no long options.
if self._long_opts:
# eg. "--foo-bar" -> "foo_bar"
self.dest = self._long_opts[0][2:].replace('-', '_')
else:
self.dest = self._short_opts[0][1]
def _check_const (self):
if self.action != "store_const" and self.const is not None:
raise OptionError(
"'const' must not be supplied for action %r" % self.action,
self)
def _check_nargs (self):
if self.action in self.TYPED_ACTIONS:
if self.nargs is None:
self.nargs = 1
elif self.nargs is not None:
raise OptionError(
"'nargs' must not be supplied for action %r" % self.action,
self)
def _check_callback (self):
if self.action == "callback":
if not callable(self.callback):
raise OptionError(
"callback not callable: %r" % self.callback, self)
if (self.callback_args is not None and
type(self.callback_args) is not types.TupleType):
raise OptionError(
"callback_args, if supplied, must be a tuple: not %r"
% self.callback_args, self)
if (self.callback_kwargs is not None and
type(self.callback_kwargs) is not types.DictType):
raise OptionError(
"callback_kwargs, if supplied, must be a dict: not %r"
% self.callback_kwargs, self)
else:
if self.callback is not None:
raise OptionError(
"callback supplied (%r) for non-callback option"
% self.callback, self)
if self.callback_args is not None:
raise OptionError(
"callback_args supplied for non-callback option", self)
if self.callback_kwargs is not None:
raise OptionError(
"callback_kwargs supplied for non-callback option", self)
CHECK_METHODS = [_check_action,
_check_type,
_check_choice,
_check_dest,
_check_const,
_check_nargs,
_check_callback]
# -- Miscellaneous methods -----------------------------------------
def __str__ (self):
return "/".join(self._short_opts + self._long_opts)
def takes_value (self):
return self.type is not None
# -- Processing methods --------------------------------------------
def check_value (self, opt, value):
checker = self.TYPE_CHECKER.get(self.type)
if checker is None:
return value
else:
return checker(self, opt, value)
def process (self, opt, value, values, parser):
# First, convert the value(s) to the right type. Howl if any
# value(s) are bogus.
if value is not None:
if self.nargs == 1:
value = self.check_value(opt, value)
else:
value = tuple([self.check_value(opt, v) for v in value])
# And then take whatever action is expected of us.
# This is a separate method to make life easier for
# subclasses to add new actions.
return self.take_action(
self.action, self.dest, opt, value, values, parser)
def take_action (self, action, dest, opt, value, values, parser):
if action == "store":
setattr(values, dest, value)
elif action == "store_const":
setattr(values, dest, self.const)
elif action == "store_true":
setattr(values, dest, True)
elif action == "store_false":
setattr(values, dest, False)
elif action == "append":
values.ensure_value(dest, []).append(value)
elif action == "count":
setattr(values, dest, values.ensure_value(dest, 0) + 1)
elif action == "callback":
args = self.callback_args or ()
kwargs = self.callback_kwargs or {}
self.callback(self, opt, value, parser, *args, **kwargs)
elif action == "help":
parser.print_help()
sys.exit(0)
elif action == "version":
parser.print_version()
sys.exit(0)
else:
raise RuntimeError, "unknown action %r" % self.action
return 1
# class Option
def get_prog_name ():
return os.path.basename(sys.argv[0])
SUPPRESS_HELP = "SUPPRESS"+"HELP"
SUPPRESS_USAGE = "SUPPRESS"+"USAGE"
STD_HELP_OPTION = Option("-h", "--help",
action="help",
help="show this help message and exit")
STD_VERSION_OPTION = Option("--version",
action="version",
help="show program's version number and exit")
class Values:
def __init__ (self, defaults=None):
if defaults:
for (attr, val) in defaults.items():
setattr(self, attr, val)
def __repr__ (self):
return ("<%s at 0x%x: %r>"
% (self.__class__.__name__, id(self), self.__dict__))
def _update_careful (self, dict):
"""
Update the option values from an arbitrary dictionary, but only
use keys from dict that already have a corresponding attribute
in self. Any keys in dict without a corresponding attribute
are silently ignored.
"""
for attr in dir(self):
if dict.has_key(attr):
dval = dict[attr]
if dval is not None:
setattr(self, attr, dval)
def _update_loose (self, dict):
"""
Update the option values from an arbitrary dictionary,
using all keys from the dictionary regardless of whether
they have a corresponding attribute in self or not.
"""
self.__dict__.update(dict)
def _update (self, dict, mode):
if mode == "careful":
self._update_careful(dict)
elif mode == "loose":
self._update_loose(dict)
else:
raise ValueError, "invalid update mode: %r" % mode
def read_module (self, modname, mode="careful"):
__import__(modname)
mod = sys.modules[modname]
self._update(vars(mod), mode)
def read_file (self, filename, mode="careful"):
vars = {}
execfile(filename, vars)
self._update(vars, mode)
def ensure_value (self, attr, value):
if not hasattr(self, attr) or getattr(self, attr) is None:
setattr(self, attr, value)
return getattr(self, attr)
class OptionContainer:
"""
Abstract base class.
Class attributes:
standard_option_list : [Option]
list of standard options that will be accepted by all instances
of this parser class (intended to be overridden by subclasses).
Instance attributes:
option_list : [Option]
the list of Option objects contained by this OptionContainer
_short_opt : { string : Option }
dictionary mapping short option strings, eg. "-f" or "-X",
to the Option instances that implement them. If an Option
has multiple short option strings, it will appears in this
dictionary multiple times. [1]
_long_opt : { string : Option }
dictionary mapping long option strings, eg. "--file" or
"--exclude", to the Option instances that implement them.
Again, a given Option can occur multiple times in this
dictionary. [1]
defaults : { string : any }
dictionary mapping option destination names to default
values for each destination [1]
[1] These mappings are common to (shared by) all components of the
controlling OptionParser, where they are initially created.
"""
def __init__ (self, option_class, conflict_handler, description):
# Initialize the option list and related data structures.
# This method must be provided by subclasses, and it must
# initialize at least the following instance attributes:
# option_list, _short_opt, _long_opt, defaults.
self._create_option_list()
self.option_class = option_class
self.set_conflict_handler(conflict_handler)
self.set_description(description)
def _create_option_mappings (self):
# For use by OptionParser constructor -- create the master
# option mappings used by this OptionParser and all
# OptionGroups that it owns.
self._short_opt = {} # single letter -> Option instance
self._long_opt = {} # long option -> Option instance
self.defaults = {} # maps option dest -> default value
def _share_option_mappings (self, parser):
# For use by OptionGroup constructor -- use shared option
# mappings from the OptionParser that owns this OptionGroup.
self._short_opt = parser._short_opt
self._long_opt = parser._long_opt
self.defaults = parser.defaults
def set_conflict_handler (self, handler):
if handler not in ("ignore", "error", "resolve"):
raise ValueError, "invalid conflict_resolution value %r" % handler
self.conflict_handler = handler
def set_description (self, description):
self.description = description
# -- Option-adding methods -----------------------------------------
def _check_conflict (self, option):
conflict_opts = []
for opt in option._short_opts:
if self._short_opt.has_key(opt):
conflict_opts.append((opt, self._short_opt[opt]))
for opt in option._long_opts:
if self._long_opt.has_key(opt):
conflict_opts.append((opt, self._long_opt[opt]))
if conflict_opts:
handler = self.conflict_handler
if handler == "ignore": # behaviour for Optik 1.0, 1.1
pass
elif handler == "error": # new in 1.2
raise OptionConflictError(
"conflicting option string(s): %s"
% ", ".join([co[0] for co in conflict_opts]),
option)
elif handler == "resolve": # new in 1.2
for (opt, c_option) in conflict_opts:
if opt.startswith("--"):
c_option._long_opts.remove(opt)
del self._long_opt[opt]
else:
c_option._short_opts.remove(opt)
del self._short_opt[opt]
if not (c_option._short_opts or c_option._long_opts):
c_option.container.option_list.remove(c_option)
def add_option (self, *args, **kwargs):
"""add_option(Option)
add_option(opt_str, ..., kwarg=val, ...)
"""
if type(args[0]) is types.StringType:
option = self.option_class(*args, **kwargs)
elif len(args) == 1 and not kwargs:
option = args[0]
if not isinstance(option, Option):
raise TypeError, "not an Option instance: %r" % option
else:
raise TypeError, "invalid arguments"
self._check_conflict(option)
self.option_list.append(option)
option.container = self
for opt in option._short_opts:
self._short_opt[opt] = option
for opt in option._long_opts:
self._long_opt[opt] = option
if option.dest is not None: # option has a dest, we need a default
if option.default is not NO_DEFAULT:
self.defaults[option.dest] = option.default
elif not self.defaults.has_key(option.dest):
self.defaults[option.dest] = None
return option
def add_options (self, option_list):
for option in option_list:
self.add_option(option)
# -- Option query/removal methods ----------------------------------
def get_option (self, opt_str):
return (self._short_opt.get(opt_str) or
self._long_opt.get(opt_str))
def has_option (self, opt_str):
return (self._short_opt.has_key(opt_str) or
self._long_opt.has_key(opt_str))
def remove_option (self, opt_str):
option = self._short_opt.get(opt_str)
if option is None:
option = self._long_opt.get(opt_str)
if option is None:
raise ValueError("no such option %r" % opt_str)
for opt in option._short_opts:
del self._short_opt[opt]
for opt in option._long_opts:
del self._long_opt[opt]
option.container.option_list.remove(option)
# -- Help-formatting methods ---------------------------------------
def format_option_help (self, formatter):
if not self.option_list:
return ""
result = []
for option in self.option_list:
if not option.help is SUPPRESS_HELP:
result.append(formatter.format_option(option))
return "".join(result)
def format_description (self, formatter):
if self.description:
return formatter.format_description(self.description)
else:
return ""
def format_help (self, formatter):
if self.description:
desc = self.format_description(formatter) + "\n"
else:
desc = ""
return desc + self.format_option_help(formatter)
class OptionGroup (OptionContainer):
def __init__ (self, parser, title, description=None):
self.parser = parser
OptionContainer.__init__(
self, parser.option_class, parser.conflict_handler, description)
self.title = title
def _create_option_list (self):
self.option_list = []
self._share_option_mappings(self.parser)
def set_title (self, title):
self.title = title
# -- Help-formatting methods ---------------------------------------
def format_help (self, formatter):
result = formatter.format_heading(self.title)
formatter.indent()
result += OptionContainer.format_help(self, formatter)
formatter.dedent()
return result
class OptionParser (OptionContainer):
"""
Class attributes:
standard_option_list : [Option]
list of standard options that will be accepted by all instances
of this parser class (intended to be overridden by subclasses).
Instance attributes:
usage : string
a usage string for your program. Before it is displayed
to the user, "%prog" will be expanded to the name of
your program (self.prog or os.path.basename(sys.argv[0])).
prog : string
the name of the current program (to override
os.path.basename(sys.argv[0])).
allow_interspersed_args : boolean = true
if true, positional arguments may be interspersed with options.
Assuming -a and -b each take a single argument, the command-line
-ablah foo bar -bboo baz
will be interpreted the same as
-ablah -bboo -- foo bar baz
If this flag were false, that command line would be interpreted as
-ablah -- foo bar -bboo baz
-- ie. we stop processing options as soon as we see the first
non-option argument. (This is the tradition followed by
Python's getopt module, Perl's Getopt::Std, and other argument-
parsing libraries, but it is generally annoying to users.)
rargs : [string]
the argument list currently being parsed. Only set when
parse_args() is active, and continually trimmed down as
we consume arguments. Mainly there for the benefit of
callback options.
largs : [string]
the list of leftover arguments that we have skipped while
parsing options. If allow_interspersed_args is false, this
list is always empty.
values : Values
the set of option values currently being accumulated. Only
set when parse_args() is active. Also mainly for callbacks.
Because of the 'rargs', 'largs', and 'values' attributes,
OptionParser is not thread-safe. If, for some perverse reason, you
need to parse command-line arguments simultaneously in different
threads, use different OptionParser instances.
"""
standard_option_list = []
def __init__ (self,
usage=None,
option_list=None,
option_class=Option,
version=None,
conflict_handler="error",
description=None,
formatter=None,
add_help_option=1,
prog=None):
OptionContainer.__init__(
self, option_class, conflict_handler, description)
self.set_usage(usage)
self.prog = prog
self.version = version
self.allow_interspersed_args = 1
if formatter is None:
formatter = IndentedHelpFormatter()
self.formatter = formatter
# Populate the option list; initial sources are the
# standard_option_list class attribute, the 'option_list'
# argument, and the STD_VERSION_OPTION (if 'version' supplied)
# and STD_HELP_OPTION globals.
self._populate_option_list(option_list,
add_help=add_help_option)
self._init_parsing_state()
# -- Private methods -----------------------------------------------
# (used by our or OptionContainer's constructor)
def _create_option_list (self):
self.option_list = []
self.option_groups = []
self._create_option_mappings()
def _populate_option_list (self, option_list, add_help=1):
if self.standard_option_list:
self.add_options(self.standard_option_list)
if option_list:
self.add_options(option_list)
if self.version:
self.add_option(STD_VERSION_OPTION)
if add_help:
self.add_option(STD_HELP_OPTION)
def _init_parsing_state (self):
# These are set in parse_args() for the convenience of callbacks.
self.rargs = None
self.largs = None
self.values = None
# -- Simple modifier methods ---------------------------------------
def set_usage (self, usage):
if usage is None:
self.usage = "%prog [options]"
elif usage is SUPPRESS_USAGE:
self.usage = None
elif usage.startswith("usage: "):
# for backwards compatibility with Optik 1.3 and earlier
self.usage = usage[7:]
else:
self.usage = usage
def enable_interspersed_args (self):
self.allow_interspersed_args = 1
def disable_interspersed_args (self):
self.allow_interspersed_args = 0
def set_default (self, dest, value):
self.defaults[dest] = value
def set_defaults (self, **kwargs):
self.defaults.update(kwargs)
def get_default_values (self):
return Values(self.defaults)
# -- OptionGroup methods -------------------------------------------
def add_option_group (self, *args, **kwargs):
# XXX lots of overlap with OptionContainer.add_option()
if type(args[0]) is types.StringType:
group = OptionGroup(self, *args, **kwargs)
elif len(args) == 1 and not kwargs:
group = args[0]
if not isinstance(group, OptionGroup):
raise TypeError, "not an OptionGroup instance: %r" % group
if group.parser is not self:
raise ValueError, "invalid OptionGroup (wrong parser)"
else:
raise TypeError, "invalid arguments"
self.option_groups.append(group)
return group
def get_option_group (self, opt_str):
option = (self._short_opt.get(opt_str) or
self._long_opt.get(opt_str))
if option and option.container is not self:
return option.container
return None
# -- Option-parsing methods ----------------------------------------
def _get_args (self, args):
if args is None:
return sys.argv[1:]
else:
return args[:] # don't modify caller's list
def parse_args (self, args=None, values=None):
"""
parse_args(args : [string] = sys.argv[1:],
values : Values = None)
-> (values : Values, args : [string])
Parse the command-line options found in 'args' (default:
sys.argv[1:]). Any errors result in a call to 'error()', which
by default prints the usage message to stderr and calls
sys.exit() with an error message. On success returns a pair
(values, args) where 'values' is an Values instance (with all
your option values) and 'args' is the list of arguments left
over after parsing options.
"""
rargs = self._get_args(args)
if values is None:
values = self.get_default_values()
# Store the halves of the argument list as attributes for the
# convenience of callbacks:
# rargs
# the rest of the command-line (the "r" stands for
# "remaining" or "right-hand")
# largs
# the leftover arguments -- ie. what's left after removing
# options and their arguments (the "l" stands for "leftover"
# or "left-hand")
self.rargs = rargs
self.largs = largs = []
self.values = values
try:
stop = self._process_args(largs, rargs, values)
except (BadOptionError, OptionValueError), err:
self.error(err.msg)
args = largs + rargs
return self.check_values(values, args)
def check_values (self, values, args):
"""
check_values(values : Values, args : [string])
-> (values : Values, args : [string])
Check that the supplied option values and leftover arguments are
valid. Returns the option values and leftover arguments
(possibly adjusted, possibly completely new -- whatever you
like). Default implementation just returns the passed-in
values; subclasses may override as desired.
"""
return (values, args)
def _process_args (self, largs, rargs, values):
"""_process_args(largs : [string],
rargs : [string],
values : Values)
Process command-line arguments and populate 'values', consuming
options and arguments from 'rargs'. If 'allow_interspersed_args' is
false, stop at the first non-option argument. If true, accumulate any
interspersed non-option arguments in 'largs'.
"""
while rargs:
arg = rargs[0]
# We handle bare "--" explicitly, and bare "-" is handled by the
# standard arg handler since the short arg case ensures that the
# len of the opt string is greater than 1.
if arg == "--":
del rargs[0]
return
elif arg[0:2] == "--":
# process a single long option (possibly with value(s))
self._process_long_opt(rargs, values)
elif arg[:1] == "-" and len(arg) > 1:
# process a cluster of short options (possibly with
# value(s) for the last one only)
self._process_short_opts(rargs, values)
elif self.allow_interspersed_args:
largs.append(arg)
del rargs[0]
else:
return # stop now, leave this arg in rargs
# Say this is the original argument list:
# [arg0, arg1, ..., arg(i-1), arg(i), arg(i+1), ..., arg(N-1)]
# ^
# (we are about to process arg(i)).
#
# Then rargs is [arg(i), ..., arg(N-1)] and largs is a *subset* of
# [arg0, ..., arg(i-1)] (any options and their arguments will have
# been removed from largs).
#
# The while loop will usually consume 1 or more arguments per pass.
# If it consumes 1 (eg. arg is an option that takes no arguments),
# then after _process_arg() is done the situation is:
#
# largs = subset of [arg0, ..., arg(i)]
# rargs = [arg(i+1), ..., arg(N-1)]
#
# If allow_interspersed_args is false, largs will always be
# *empty* -- still a subset of [arg0, ..., arg(i-1)], but
# not a very interesting subset!
def _match_long_opt (self, opt):
"""_match_long_opt(opt : string) -> string
Determine which long option string 'opt' matches, ie. which one
it is an unambiguous abbrevation for. Raises BadOptionError if
'opt' doesn't unambiguously match any long option string.
"""
return _match_abbrev(opt, self._long_opt)
def _process_long_opt (self, rargs, values):
arg = rargs.pop(0)
# Value explicitly attached to arg? Pretend it's the next
# argument.
if "=" in arg:
(opt, next_arg) = arg.split("=", 1)
rargs.insert(0, next_arg)
had_explicit_value = 1
else:
opt = arg
had_explicit_value = 0
opt = self._match_long_opt(opt)
option = self._long_opt[opt]
if option.takes_value():
nargs = option.nargs
if len(rargs) < nargs:
if nargs == 1:
self.error("%s option requires a value" % opt)
else:
self.error("%s option requires %d values"
% (opt, nargs))
elif nargs == 1:
value = rargs.pop(0)
else:
value = tuple(rargs[0:nargs])
del rargs[0:nargs]
elif had_explicit_value:
self.error("%s option does not take a value" % opt)
else:
value = None
option.process(opt, value, values, self)
def _process_short_opts (self, rargs, values):
arg = rargs.pop(0)
stop = 0
i = 1
for ch in arg[1:]:
opt = "-" + ch
option = self._short_opt.get(opt)
i += 1 # we have consumed a character
if not option:
self.error("no such option: %s" % opt)
if option.takes_value():
# Any characters left in arg? Pretend they're the
# next arg, and stop consuming characters of arg.
if i < len(arg):
rargs.insert(0, arg[i:])
stop = 1
nargs = option.nargs
if len(rargs) < nargs:
if nargs == 1:
self.error("%s option requires a value" % opt)
else:
self.error("%s option requires %s values"
% (opt, nargs))
elif nargs == 1:
value = rargs.pop(0)
else:
value = tuple(rargs[0:nargs])
del rargs[0:nargs]
else: # option doesn't take a value
value = None
option.process(opt, value, values, self)
if stop:
break
# -- Feedback methods ----------------------------------------------
def error (self, msg):
"""error(msg : string)
Print a usage message incorporating 'msg' to stderr and exit.
If you override this in a subclass, it should not return -- it
should either exit or raise an exception.
"""
self.print_usage(sys.stderr)
sys.exit("%s: error: %s" % (get_prog_name(), msg))
def get_usage (self):
if self.usage:
return self.formatter.format_usage(
self.usage.replace("%prog", get_prog_name()))
else:
return ""
def print_usage (self, file=None):
"""print_usage(file : file = stdout)
Print the usage message for the current program (self.usage) to
'file' (default stdout). Any occurence of the string "%prog" in
self.usage is replaced with the name of the current program
(basename of sys.argv[0]). Does nothing if self.usage is empty
or not defined.
"""
if self.usage:
print >>file, self.get_usage()
def get_version (self):
if self.version:
return self.version.replace("%prog", get_prog_name())
else:
return ""
def print_version (self, file=None):
"""print_version(file : file = stdout)
Print the version message for this program (self.version) to
'file' (default stdout). As with print_usage(), any occurence
of "%prog" in self.version is replaced by the current program's
name. Does nothing if self.version is empty or undefined.
"""
if self.version:
print >>file, self.get_version()
def format_option_help (self, formatter=None):
if formatter is None:
formatter = self.formatter
formatter.store_option_strings(self)
result = []
result.append(formatter.format_heading("options"))
formatter.indent()
if self.option_list:
result.append(OptionContainer.format_option_help(self, formatter))
result.append("\n")
for group in self.option_groups:
result.append(group.format_help(formatter))
result.append("\n")
formatter.dedent()
# Drop the last "\n", or the header if no options or option groups:
return "".join(result[:-1])
def format_help (self, formatter=None):
if formatter is None:
formatter = self.formatter
result = []
if self.usage:
result.append(self.get_usage() + "\n")
if self.description:
result.append(self.format_description(formatter) + "\n")
result.append(self.format_option_help(formatter))
return "".join(result)
def print_help (self, file=None):
"""print_help(file : file = stdout)
Print an extended help message, listing all options and any
help text provided with them, to 'file' (default stdout).
"""
if file is None:
file = sys.stdout
file.write(self.format_help())
# class OptionParser
def _match_abbrev (s, wordmap):
"""_match_abbrev(s : string, wordmap : {string : Option}) -> string
Return the string key in 'wordmap' for which 's' is an unambiguous
abbreviation. If 's' is found to be ambiguous or doesn't match any of
'words', raise BadOptionError.
"""
# Is there an exact match?
if wordmap.has_key(s):
return s
else:
# Isolate all words with s as a prefix.
possibilities = [word for word in wordmap.keys()
if word.startswith(s)]
# No exact match, so there had better be just one possibility.
if len(possibilities) == 1:
return possibilities[0]
elif not possibilities:
raise BadOptionError("no such option: %s" % s)
else:
# More than one possible completion: ambiguous prefix.
raise BadOptionError("ambiguous option: %s (%s?)"
% (s, ", ".join(possibilities)))
# Some day, there might be many Option classes. As of Optik 1.3, the
# preferred way to instantiate Options is indirectly, via make_option(),
# which will become a factory function when there are many Option
# classes.
make_option = Option
"""Convert to and from Roman numerals"""
__author__ = "Mark Pilgrim (f8dy@diveintopython.org)"
__version__ = "1.4"
__date__ = "8 August 2001"
__copyright__ = """Copyright (c) 2001 Mark Pilgrim
This program is part of "Dive Into Python", a free Python tutorial for
experienced programmers. Visit http://diveintopython.org/ for the
latest version.
This program is free software; you can redistribute it and/or modify
it under the terms of the Python 2.1.1 license, available at
http://www.python.org/2.1.1/license.html
"""
import re
#Define exceptions
class RomanError(Exception): pass
class OutOfRangeError(RomanError): pass
class NotIntegerError(RomanError): pass
class InvalidRomanNumeralError(RomanError): pass
#Define digit mapping
romanNumeralMap = (('M', 1000),
('CM', 900),
('D', 500),
('CD', 400),
('C', 100),
('XC', 90),
('L', 50),
('XL', 40),
('X', 10),
('IX', 9),
('V', 5),
('IV', 4),
('I', 1))
def toRoman(n):
"""convert integer to Roman numeral"""
if not (0 < n < 5000):
raise OutOfRangeError, "number out of range (must be 1..4999)"
if int(n) <> n:
raise NotIntegerError, "decimals can not be converted"
result = ""
for numeral, integer in romanNumeralMap:
while n >= integer:
result += numeral
n -= integer
return result
#Define pattern to detect valid Roman numerals
romanNumeralPattern = re.compile('''
^ # beginning of string
M{0,4} # thousands - 0 to 4 M's
(CM|CD|D?C{0,3}) # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 C's),
# or 500-800 (D, followed by 0 to 3 C's)
(XC|XL|L?X{0,3}) # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 X's),
# or 50-80 (L, followed by 0 to 3 X's)
(IX|IV|V?I{0,3}) # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 I's),
# or 5-8 (V, followed by 0 to 3 I's)
$ # end of string
''' ,re.VERBOSE)
def fromRoman(s):
"""convert Roman numeral to integer"""
if not s:
raise InvalidRomanNumeralError, 'Input can not be blank'
if not romanNumeralPattern.search(s):
raise InvalidRomanNumeralError, 'Invalid Roman numeral: %s' % s
result = 0
index = 0
for numeral, integer in romanNumeralMap:
while s[index:index+len(numeral)] == numeral:
result += integer
index += len(numeral)
return result
# Author: David Goodger
# Contact: goodger@users.sourceforge.net
# Revision: $Revision: 1.3 $
# Date: $Date: 2003/07/10 15:49:30 $
# Copyright: This module has been placed in the public domain.
"""
A finite state machine specialized for regular-expression-based text filters,
this module defines the following classes:
- `StateMachine`, a state machine
- `State`, a state superclass
- `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
- `StateWS`, a state superclass for use with `StateMachineWS`
- `SearchStateMachine`, uses `re.search()` instead of `re.match()`
- `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
- `ViewList`, extends standard Python lists.
- `StringList`, string-specific ViewList.
Exception classes:
- `StateMachineError`
- `UnknownStateError`
- `DuplicateStateError`
- `UnknownTransitionError`
- `DuplicateTransitionError`
- `TransitionPatternNotFound`
- `TransitionMethodNotFound`
- `UnexpectedIndentationError`
- `TransitionCorrection`: Raised to switch to another transition.
- `StateCorrection`: Raised to switch to another state & transition.
Functions:
- `string2lines()`: split a multi-line string into a list of one-line strings
How To Use This Module
======================
(See the individual classes, methods, and attributes for details.)
1. Import it: ``import statemachine`` or ``from statemachine import ...``.
You will also need to ``import re``.
2. Derive a subclass of `State` (or `StateWS`) for each state in your state
machine::
class MyState(statemachine.State):
Within the state's class definition:
a) Include a pattern for each transition, in `State.patterns`::
patterns = {'atransition': r'pattern', ...}
b) Include a list of initial transitions to be set up automatically, in
`State.initial_transitions`::
initial_transitions = ['atransition', ...]
c) Define a method for each transition, with the same name as the
transition pattern::
def atransition(self, match, context, next_state):
# do something
result = [...] # a list
return context, next_state, result
# context, next_state may be altered
Transition methods may raise an `EOFError` to cut processing short.
d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
transition methods, which handle the beginning- and end-of-file.
e) In order to handle nested processing, you may wish to override the
attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
If you are using `StateWS` as a base class, in order to handle nested
indented blocks, you may wish to:
- override the attributes `StateWS.indent_sm`,
`StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
`StateWS.known_indent_sm_kwargs`;
- override the `StateWS.blank()` method; and/or
- override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
and/or `StateWS.firstknown_indent()` methods.
3. Create a state machine object::
sm = StateMachine(state_classes=[MyState, ...],
initial_state='MyState')
4. Obtain the input text, which needs to be converted into a tab-free list of
one-line strings. For example, to read text from a file called
'inputfile'::
input_string = open('inputfile').read()
input_lines = statemachine.string2lines(input_string)
5. Run the state machine on the input text and collect the results, a list::
results = sm.run(input_lines)
6. Remove any lingering circular references::
sm.unlink()
"""
__docformat__ = 'restructuredtext'
import sys
import re
from types import SliceType as _SliceType
class StateMachine:
"""
A finite state machine for text filters using regular expressions.
The input is provided in the form of a list of one-line strings (no
newlines). States are subclasses of the `State` class. Transitions consist
of regular expression patterns and transition methods, and are defined in
each state.
The state machine is started with the `run()` method, which returns the
results of processing in a list.
"""
def __init__(self, state_classes, initial_state, debug=0):
"""
Initialize a `StateMachine` object; add state objects.
Parameters:
- `state_classes`: a list of `State` (sub)classes.
- `initial_state`: a string, the class name of the initial state.
- `debug`: a boolean; produce verbose output if true (nonzero).
"""
self.input_lines = None
"""`StringList` of input lines (without newlines).
Filled by `self.run()`."""
self.input_offset = 0
"""Offset of `self.input_lines` from the beginning of the file."""
self.line = None
"""Current input line."""
self.line_offset = -1
"""Current input line offset from beginning of `self.input_lines`."""
self.debug = debug
"""Debugging mode on/off."""
self.initial_state = initial_state
"""The name of the initial state (key to `self.states`)."""
self.current_state = initial_state
"""The name of the current state (key to `self.states`)."""
self.states = {}
"""Mapping of {state_name: State_object}."""
self.add_states(state_classes)
self.observers = []
"""List of bound methods or functions to call whenever the current
line changes. Observers are called with one argument, ``self``.
Cleared at the end of `run()`."""
def unlink(self):
"""Remove circular references to objects no longer required."""
for state in self.states.values():
state.unlink()
self.states = None
def run(self, input_lines, input_offset=0, context=None,
input_source=None):
"""
Run the state machine on `input_lines`. Return results (a list).
Reset `self.line_offset` and `self.current_state`. Run the
beginning-of-file transition. Input one line at a time and check for a
matching transition. If a match is found, call the transition method
and possibly change the state. Store the context returned by the
transition method to be passed on to the next transition matched.
Accumulate the results returned by the transition methods in a list.
Run the end-of-file transition. Finally, return the accumulated
results.
Parameters:
- `input_lines`: a list of strings without newlines, or `StringList`.
- `input_offset`: the line offset of `input_lines` from the beginning
of the file.
- `context`: application-specific storage.
- `input_source`: name or path of source of `input_lines`.
"""
self.runtime_init()
if isinstance(input_lines, StringList):
self.input_lines = input_lines
else:
self.input_lines = StringList(input_lines, source=input_source)
self.input_offset = input_offset
self.line_offset = -1
self.current_state = self.initial_state
if self.debug:
print >>sys.stderr, (
'\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
% (self.line_offset, '\n| '.join(self.input_lines)))
transitions = None
results = []
state = self.get_state()
try:
if self.debug:
print >>sys.stderr, ('\nStateMachine.run: bof transition')
context, result = state.bof(context)
results.extend(result)
while 1:
try:
try:
self.next_line()
if self.debug:
source, offset = self.input_lines.info(
self.line_offset)
print >>sys.stderr, (
'\nStateMachine.run: line (source=%r, '
'offset=%r):\n| %s'
% (source, offset, self.line))
context, next_state, result = self.check_line(
context, state, transitions)
except EOFError:
if self.debug:
print >>sys.stderr, (
'\nStateMachine.run: %s.eof transition'
% state.__class__.__name__)
result = state.eof(context)
results.extend(result)
break
else:
results.extend(result)
except TransitionCorrection, exception:
self.previous_line() # back up for another try
transitions = (exception.args[0],)
if self.debug:
print >>sys.stderr, (
'\nStateMachine.run: TransitionCorrection to '
'state "%s", transition %s.'
% (state.__class__.__name__, transitions[0]))
continue
except StateCorrection, exception:
self.previous_line() # back up for another try
next_state = exception.args[0]
if len(exception.args) == 1:
transitions = None
else:
transitions = (exception.args[1],)
if self.debug:
print >>sys.stderr, (
'\nStateMachine.run: StateCorrection to state '
'"%s", transition %s.'
% (next_state, transitions[0]))
else:
transitions = None
state = self.get_state(next_state)
except:
if self.debug:
self.error()
raise
self.observers = []
return results
def get_state(self, next_state=None):
"""
Return current state object; set it first if `next_state` given.
Parameter `next_state`: a string, the name of the next state.
Exception: `UnknownStateError` raised if `next_state` unknown.
"""
if next_state:
if self.debug and next_state != self.current_state:
print >>sys.stderr, \
('\nStateMachine.get_state: Changing state from '
'"%s" to "%s" (input line %s).'
% (self.current_state, next_state,
self.abs_line_number()))
self.current_state = next_state
try:
return self.states[self.current_state]
except KeyError:
raise UnknownStateError(self.current_state)
def next_line(self, n=1):
"""Load `self.line` with the `n`'th next line and return it."""
try:
try:
self.line_offset += n
self.line = self.input_lines[self.line_offset]
except IndexError:
self.line = None
raise EOFError
return self.line
finally:
self.notify_observers()
def is_next_line_blank(self):
"""Return 1 if the next line is blank or non-existant."""
try:
return not self.input_lines[self.line_offset + 1].strip()
except IndexError:
return 1
def at_eof(self):
"""Return 1 if the input is at or past end-of-file."""
return self.line_offset >= len(self.input_lines) - 1
def at_bof(self):
"""Return 1 if the input is at or before beginning-of-file."""
return self.line_offset <= 0
def previous_line(self, n=1):
"""Load `self.line` with the `n`'th previous line and return it."""
self.line_offset -= n
if self.line_offset < 0:
self.line = None
else:
self.line = self.input_lines[self.line_offset]
self.notify_observers()
return self.line
def goto_line(self, line_offset):
"""Jump to absolute line offset `line_offset`, load and return it."""
try:
try:
self.line_offset = line_offset - self.input_offset
self.line = self.input_lines[self.line_offset]
except IndexError:
self.line = None
raise EOFError
return self.line
finally:
self.notify_observers()
def abs_line_offset(self):
"""Return line offset of current line, from beginning of file."""
return self.line_offset + self.input_offset
def abs_line_number(self):
"""Return line number of current line (counting from 1)."""
return self.line_offset + self.input_offset + 1
def insert_input(self, input_lines, source):
self.input_lines.insert(self.line_offset + 1, '',
source='internal padding')
self.input_lines.insert(self.line_offset + 1, '',
source='internal padding')
self.input_lines.insert(self.line_offset + 2,
StringList(input_lines, source))
def get_text_block(self, flush_left=0):
"""
Return a contiguous block of text.
If `flush_left` is true, raise `UnexpectedIndentationError` if an
indented line is encountered before the text block ends (with a blank
line).
"""
try:
block = self.input_lines.get_text_block(self.line_offset,
flush_left)
self.next_line(len(block) - 1)
return block
except UnexpectedIndentationError, error:
block, source, lineno = error
self.next_line(len(block) - 1) # advance to last line of block
raise
def check_line(self, context, state, transitions=None):
"""
Examine one line of input for a transition match & execute its method.
Parameters:
- `context`: application-dependent storage.
- `state`: a `State` object, the current state.
- `transitions`: an optional ordered list of transition names to try,
instead of ``state.transition_order``.
Return the values returned by the transition method:
- context: possibly modified from the parameter `context`;
- next state name (`State` subclass name);
- the result output of the transition, a list.
When there is no match, ``state.no_match()`` is called and its return
value is returned.
"""
if transitions is None:
transitions = state.transition_order
state_correction = None
if self.debug:
print >>sys.stderr, (
'\nStateMachine.check_line: state="%s", transitions=%r.'
% (state.__class__.__name__, transitions))
for name in transitions:
pattern, method, next_state = state.transitions[name]
match = self.match(pattern)
if match:
if self.debug:
print >>sys.stderr, (
'\nStateMachine.check_line: Matched transition '
'"%s" in state "%s".'
% (name, state.__class__.__name__))
return method(match, context, next_state)
else:
if self.debug:
print >>sys.stderr, (
'\nStateMachine.check_line: No match in state "%s".'
% state.__class__.__name__)
return state.no_match(context, transitions)
def match(self, pattern):
"""
Return the result of a regular expression match.
Parameter `pattern`: an `re` compiled regular expression.
"""
return pattern.match(self.line)
def add_state(self, state_class):
"""
Initialize & add a `state_class` (`State` subclass) object.
Exception: `DuplicateStateError` raised if `state_class` was already
added.
"""
statename = state_class.__name__
if self.states.has_key(statename):
raise DuplicateStateError(statename)
self.states[statename] = state_class(self, self.debug)
def add_states(self, state_classes):
"""
Add `state_classes` (a list of `State` subclasses).
"""
for state_class in state_classes:
self.add_state(state_class)
def runtime_init(self):
"""
Initialize `self.states`.
"""
for state in self.states.values():
state.runtime_init()
def error(self):
"""Report error details."""
type, value, module, line, function = _exception_data()
print >>sys.stderr, '%s: %s' % (type, value)
print >>sys.stderr, 'input line %s' % (self.abs_line_number())
print >>sys.stderr, ('module %s, line %s, function %s'
% (module, line, function))
def attach_observer(self, observer):
"""
The `observer` parameter is a function or bound method which takes two
arguments, the source and offset of the current line.
"""
self.observers.append(observer)
def detach_observer(self, observer):
self.observers.remove(observer)
def notify_observers(self):
for observer in self.observers:
try:
info = self.input_lines.info(self.line_offset)
except IndexError:
info = (None, None)
observer(*info)
class State:
"""
State superclass. Contains a list of transitions, and transition methods.
Transition methods all have the same signature. They take 3 parameters:
- An `re` match object. ``match.string`` contains the matched input line,
``match.start()`` gives the start index of the match, and
``match.end()`` gives the end index.
- A context object, whose meaning is application-defined (initial value
``None``). It can be used to store any information required by the state
machine, and the retured context is passed on to the next transition
method unchanged.
- The name of the next state, a string, taken from the transitions list;
normally it is returned unchanged, but it may be altered by the
transition method if necessary.
Transition methods all return a 3-tuple:
- A context object, as (potentially) modified by the transition method.
- The next state name (a return value of ``None`` means no state change).
- The processing result, a list, which is accumulated by the state
machine.
Transition methods may raise an `EOFError` to cut processing short.
There are two implicit transitions, and corresponding transition methods
are defined: `bof()` handles the beginning-of-file, and `eof()` handles
the end-of-file. These methods have non-standard signatures and return
values. `bof()` returns the initial context and results, and may be used
to return a header string, or do any other processing needed. `eof()`
should handle any remaining context and wrap things up; it returns the
final processing result.
Typical applications need only subclass `State` (or a subclass), set the
`patterns` and `initial_transitions` class attributes, and provide
corresponding transition methods. The default object initialization will
take care of constructing the list of transitions.
"""
patterns = None
"""
{Name: pattern} mapping, used by `make_transition()`. Each pattern may
be a string or a compiled `re` pattern. Override in subclasses.
"""
initial_transitions = None
"""
A list of transitions to initialize when a `State` is instantiated.
Each entry is either a transition name string, or a (transition name, next
state name) pair. See `make_transitions()`. Override in subclasses.
"""
nested_sm = None
"""
The `StateMachine` class for handling nested processing.
If left as ``None``, `nested_sm` defaults to the class of the state's
controlling state machine. Override it in subclasses to avoid the default.
"""
nested_sm_kwargs = None
"""
Keyword arguments dictionary, passed to the `nested_sm` constructor.
Two keys must have entries in the dictionary:
- Key 'state_classes' must be set to a list of `State` classes.
- Key 'initial_state' must be set to the name of the initial state class.
If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
class of the current state, and 'initial_state' defaults to the name of
the class of the current state. Override in subclasses to avoid the
defaults.
"""
def __init__(self, state_machine, debug=0):
"""
Initialize a `State` object; make & add initial transitions.
Parameters:
- `statemachine`: the controlling `StateMachine` object.
- `debug`: a boolean; produce verbose output if true (nonzero).
"""
self.transition_order = []
"""A list of transition names in search order."""
self.transitions = {}
"""
A mapping of transition names to 3-tuples containing
(compiled_pattern, transition_method, next_state_name). Initialized as
an instance attribute dynamically (instead of as a class attribute)
because it may make forward references to patterns and methods in this
or other classes.
"""
self.add_initial_transitions()
self.state_machine = state_machine
"""A reference to the controlling `StateMachine` object."""
self.debug = debug
"""Debugging mode on/off."""
if self.nested_sm is None:
self.nested_sm = self.state_machine.__class__
if self.nested_sm_kwargs is None:
self.nested_sm_kwargs = {'state_classes': [self.__class__],
'initial_state': self.__class__.__name__}
def runtime_init(self):
"""
Initialize this `State` before running the state machine; called from
`self.state_machine.run()`.
"""
pass
def unlink(self):
"""Remove circular references to objects no longer required."""
self.state_machine = None
def add_initial_transitions(self):
"""Make and add transitions listed in `self.initial_transitions`."""
if self.initial_transitions:
names, transitions = self.make_transitions(
self.initial_transitions)
self.add_transitions(names, transitions)
def add_transitions(self, names, transitions):
"""
Add a list of transitions to the start of the transition list.
Parameters:
- `names`: a list of transition names.
- `transitions`: a mapping of names to transition tuples.
Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
"""
for name in names:
if self.transitions.has_key(name):
raise DuplicateTransitionError(name)
if not transitions.has_key(name):
raise UnknownTransitionError(name)
self.transition_order[:0] = names
self.transitions.update(transitions)
def add_transition(self, name, transition):
"""
Add a transition to the start of the transition list.
Parameter `transition`: a ready-made transition 3-tuple.
Exception: `DuplicateTransitionError`.
"""
if self.transitions.has_key(name):
raise DuplicateTransitionError(name)
self.transition_order[:0] = [name]
self.transitions[name] = transition
def remove_transition(self, name):
"""
Remove a transition by `name`.
Exception: `UnknownTransitionError`.
"""
try:
del self.transitions[name]
self.transition_order.remove(name)
except:
raise UnknownTransitionError(name)
def make_transition(self, name, next_state=None):
"""
Make & return a transition tuple based on `name`.
This is a convenience function to simplify transition creation.
Parameters:
- `name`: a string, the name of the transition pattern & method. This
`State` object must have a method called '`name`', and a dictionary
`self.patterns` containing a key '`name`'.
- `next_state`: a string, the name of the next `State` object for this
transition. A value of ``None`` (or absent) implies no state change
(i.e., continue with the same state).
Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
"""
if next_state is None:
next_state = self.__class__.__name__
try:
pattern = self.patterns[name]
if not hasattr(pattern, 'match'):
pattern = re.compile(pattern)
except KeyError:
raise TransitionPatternNotFound(
'%s.patterns[%r]' % (self.__class__.__name__, name))
try:
method = getattr(self, name)
except AttributeError:
raise TransitionMethodNotFound(
'%s.%s' % (self.__class__.__name__, name))
return (pattern, method, next_state)
def make_transitions(self, name_list):
"""
Return a list of transition names and a transition mapping.
Parameter `name_list`: a list, where each entry is either a transition
name string, or a 1- or 2-tuple (transition name, optional next state
name).
"""
stringtype = type('')
names = []
transitions = {}
for namestate in name_list:
if type(namestate) is stringtype:
transitions[namestate] = self.make_transition(namestate)
names.append(namestate)
else:
transitions[namestate[0]] = self.make_transition(*namestate)
names.append(namestate[0])
return names, transitions
def no_match(self, context, transitions):
"""
Called when there is no match from `StateMachine.check_line()`.
Return the same values returned by transition methods:
- context: unchanged;
- next state name: ``None``;
- empty result list.
Override in subclasses to catch this event.
"""
return context, None, []
def bof(self, context):
"""
Handle beginning-of-file. Return unchanged `context`, empty result.
Override in subclasses.
Parameter `context`: application-defined storage.
"""
return context, []
def eof(self, context):
"""
Handle end-of-file. Return empty result.
Override in subclasses.
Parameter `context`: application-defined storage.
"""
return []
def nop(self, match, context, next_state):
"""
A "do nothing" transition method.
Return unchanged `context` & `next_state`, empty result. Useful for
simple state changes (actionless transitions).
"""
return context, next_state, []
class StateMachineWS(StateMachine):
"""
`StateMachine` subclass specialized for whitespace recognition.
There are three methods provided for extracting indented text blocks:
- `get_indented()`: use when the indent is unknown.
- `get_known_indented()`: use when the indent is known for all lines.
- `get_first_known_indented()`: use when only the first line's indent is
known.
"""
def get_indented(self, until_blank=0, strip_indent=1):
"""
Return a block of indented lines of text, and info.
Extract an indented block where the indent is unknown for all lines.
:Parameters:
- `until_blank`: Stop collecting at the first blank line if true
(1).
- `strip_indent`: Strip common leading indent if true (1,
default).
:Return:
- the indented block (a list of lines of text),
- its indent,
- its first line offset from BOF, and
- whether or not it finished with a blank line.
"""
offset = self.abs_line_offset()
indented, indent, blank_finish = self.input_lines.get_indented(
self.line_offset, until_blank, strip_indent)
if indented:
self.next_line(len(indented) - 1) # advance to last indented line
while indented and not indented[0].strip():
indented.trim_start()
offset += 1
return indented, indent, offset, blank_finish
def get_known_indented(self, indent, until_blank=0, strip_indent=1):
"""
Return an indented block and info.
Extract an indented block where the indent is known for all lines.
Starting with the current line, extract the entire text block with at
least `indent` indentation (which must be whitespace, except for the
first line).
:Parameters:
- `indent`: The number of indent columns/characters.
- `until_blank`: Stop collecting at the first blank line if true
(1).
- `strip_indent`: Strip `indent` characters of indentation if true
(1, default).
:Return:
- the indented block,
- its first line offset from BOF, and
- whether or not it finished with a blank line.
"""
offset = self.abs_line_offset()
indented, indent, blank_finish = self.input_lines.get_indented(
self.line_offset, until_blank, strip_indent,
block_indent=indent)
self.next_line(len(indented) - 1) # advance to last indented line
while indented and not indented[0].strip():
indented.trim_start()
offset += 1
return indented, offset, blank_finish
def get_first_known_indented(self, indent, until_blank=0, strip_indent=1,
strip_top=1):
"""
Return an indented block and info.
Extract an indented block where the indent is known for the first line
and unknown for all other lines.
:Parameters:
- `indent`: The first line's indent (# of columns/characters).
- `until_blank`: Stop collecting at the first blank line if true
(1).
- `strip_indent`: Strip `indent` characters of indentation if true
(1, default).
- `strip_top`: Strip blank lines from the beginning of the block.
:Return:
- the indented block,
- its indent,
- its first line offset from BOF, and
- whether or not it finished with a blank line.
"""
offset = self.abs_line_offset()
indented, indent, blank_finish = self.input_lines.get_indented(
self.line_offset, until_blank, strip_indent,
first_indent=indent)
self.next_line(len(indented) - 1) # advance to last indented line
if strip_top:
while indented and not indented[0].strip():
indented.trim_start()
offset += 1
return indented, indent, offset, blank_finish
class StateWS(State):
"""
State superclass specialized for whitespace (blank lines & indents).
Use this class with `StateMachineWS`. The transitions 'blank' (for blank
lines) and 'indent' (for indented text blocks) are added automatically,
before any other transitions. The transition method `blank()` handles
blank lines and `indent()` handles nested indented blocks. Indented
blocks trigger a new state machine to be created by `indent()` and run.
The class of the state machine to be created is in `indent_sm`, and the
constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
The methods `known_indent()` and `firstknown_indent()` are provided for
indented blocks where the indent (all lines' and first line's only,
respectively) is known to the transition method, along with the attributes
`known_indent_sm` and `known_indent_sm_kwargs`. Neither transition method
is triggered automatically.
"""
indent_sm = None
"""
The `StateMachine` class handling indented text blocks.
If left as ``None``, `indent_sm` defaults to the value of
`State.nested_sm`. Override it in subclasses to avoid the default.
"""
indent_sm_kwargs = None
"""
Keyword arguments dictionary, passed to the `indent_sm` constructor.
If left as ``None``, `indent_sm_kwargs` defaults to the value of
`State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
"""
known_indent_sm = None
"""
The `StateMachine` class handling known-indented text blocks.
If left as ``None``, `known_indent_sm` defaults to the value of
`indent_sm`. Override it in subclasses to avoid the default.
"""
known_indent_sm_kwargs = None
"""
Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
`indent_sm_kwargs`. Override it in subclasses to avoid the default.
"""
ws_patterns = {'blank': ' *$',
'indent': ' +'}
"""Patterns for default whitespace transitions. May be overridden in
subclasses."""
ws_initial_transitions = ('blank', 'indent')
"""Default initial whitespace transitions, added before those listed in
`State.initial_transitions`. May be overridden in subclasses."""
def __init__(self, state_machine, debug=0):
"""
Initialize a `StateSM` object; extends `State.__init__()`.
Check for indent state machine attributes, set defaults if not set.
"""
State.__init__(self, state_machine, debug)
if self.indent_sm is None:
self.indent_sm = self.nested_sm
if self.indent_sm_kwargs is None:
self.indent_sm_kwargs = self.nested_sm_kwargs
if self.known_indent_sm is None:
self.known_indent_sm = self.indent_sm
if self.known_indent_sm_kwargs is None:
self.known_indent_sm_kwargs = self.indent_sm_kwargs
def add_initial_transitions(self):
"""
Add whitespace-specific transitions before those defined in subclass.
Extends `State.add_initial_transitions()`.
"""
State.add_initial_transitions(self)
if self.patterns is None:
self.patterns = {}
self.patterns.update(self.ws_patterns)
names, transitions = self.make_transitions(
self.ws_initial_transitions)
self.add_transitions(names, transitions)
def blank(self, match, context, next_state):
"""Handle blank lines. Does nothing. Override in subclasses."""
return self.nop(match, context, next_state)
def indent(self, match, context, next_state):
"""
Handle an indented text block. Extend or override in subclasses.
Recursively run the registered state machine for indented blocks
(`self.indent_sm`).
"""
indented, indent, line_offset, blank_finish = \
self.state_machine.get_indented()
sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
results = sm.run(indented, input_offset=line_offset)
return context, next_state, results
def known_indent(self, match, context, next_state):
"""
Handle a known-indent text block. Extend or override in subclasses.
Recursively run the registered state machine for known-indent indented
blocks (`self.known_indent_sm`). The indent is the length of the
match, ``match.end()``.
"""
indented, line_offset, blank_finish = \
self.state_machine.get_known_indented(match.end())
sm = self.known_indent_sm(debug=self.debug,
**self.known_indent_sm_kwargs)
results = sm.run(indented, input_offset=line_offset)
return context, next_state, results
def first_known_indent(self, match, context, next_state):
"""
Handle an indented text block (first line's indent known).
Extend or override in subclasses.
Recursively run the registered state machine for known-indent indented
blocks (`self.known_indent_sm`). The indent is the length of the
match, ``match.end()``.
"""
indented, line_offset, blank_finish = \
self.state_machine.get_first_known_indented(match.end())
sm = self.known_indent_sm(debug=self.debug,
**self.known_indent_sm_kwargs)
results = sm.run(indented, input_offset=line_offset)
return context, next_state, results
class _SearchOverride:
"""
Mix-in class to override `StateMachine` regular expression behavior.
Changes regular expression matching, from the default `re.match()`
(succeeds only if the pattern matches at the start of `self.line`) to
`re.search()` (succeeds if the pattern matches anywhere in `self.line`).
When subclassing a `StateMachine`, list this class **first** in the
inheritance list of the class definition.
"""
def match(self, pattern):
"""
Return the result of a regular expression search.
Overrides `StateMachine.match()`.
Parameter `pattern`: `re` compiled regular expression.
"""
return pattern.search(self.line)
class SearchStateMachine(_SearchOverride, StateMachine):
"""`StateMachine` which uses `re.search()` instead of `re.match()`."""
pass
class SearchStateMachineWS(_SearchOverride, StateMachineWS):
"""`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
pass
class ViewList:
"""
List with extended functionality: slices of ViewList objects are child
lists, linked to their parents. Changes made to a child list also affect
the parent list. A child list is effectively a "view" (in the SQL sense)
of the parent list. Changes to parent lists, however, do *not* affect
active child lists. If a parent list is changed, any active child lists
should be recreated.
The start and end of the slice can be trimmed using the `trim_start()` and
`trim_end()` methods, without affecting the parent list. The link between
child and parent lists can be broken by calling `disconnect()` on the
child list.
Also, ViewList objects keep track of the source & offset of each item.
This information is accessible via the `source()`, `offset()`, and
`info()` methods.
"""
def __init__(self, initlist=None, source=None, items=None,
parent=None, parent_offset=None):
self.data = []
"""The actual list of data, flattened from various sources."""
self.items = []
"""A list of (source, offset) pairs, same length as `self.data`: the
source of each line and the offset of each line from the beginning of
its source."""
self.parent = parent
"""The parent list."""
self.parent_offset = parent_offset
"""Offset of this list from the beginning of the parent list."""
if isinstance(initlist, ViewList):
self.data = initlist.data[:]
self.items = initlist.items[:]
elif initlist is not None:
self.data = list(initlist)
if items:
self.items = items
else:
self.items = [(source, i) for i in range(len(initlist))]
assert len(self.data) == len(self.items), 'data mismatch'
def __str__(self):
return str(self.data)
def __repr__(self):
return '%s(%s, items=%s)' % (self.__class__.__name__,
self.data, self.items)
def __lt__(self, other): return self.data < self.__cast(other)
def __le__(self, other): return self.data <= self.__cast(other)
def __eq__(self, other): return self.data == self.__cast(other)
def __ne__(self, other): return self.data != self.__cast(other)
def __gt__(self, other): return self.data > self.__cast(other)
def __ge__(self, other): return self.data >= self.__cast(other)
def __cmp__(self, other): return cmp(self.data, self.__cast(other))
def __cast(self, other):
if isinstance(other, ViewList):
return other.data
else:
return other
def __contains__(self, item): return item in self.data
def __len__(self): return len(self.data)
# The __getitem__()/__setitem__() methods check whether the index
# is a slice first, since native list objects start supporting
# them directly in Python 2.3 (no exception is raised when
# indexing a list with a slice object; they just work).
def __getitem__(self, i):
if isinstance(i, _SliceType):
assert i.step in (None, 1), 'cannot handle slice with stride'
return self.__class__(self.data[i.start:i.stop],
items=self.items[i.start:i.stop],
parent=self, parent_offset=i.start)
else:
return self.data[i]
def __setitem__(self, i, item):
if isinstance(i, _SliceType):
assert i.step in (None, 1), 'cannot handle slice with stride'
if not isinstance(item, ViewList):
raise TypeError('assigning non-ViewList to ViewList slice')
self.data[i.start:i.stop] = item.data
self.items[i.start:i.stop] = item.items
assert len(self.data) == len(self.items), 'data mismatch'
if self.parent:
self.parent[i.start + self.parent_offset
: i.stop + self.parent_offset] = item
else:
self.data[i] = item
if self.parent:
self.parent[i + self.parent_offset] = item
def __delitem__(self, i):
try:
del self.data[i]
del self.items[i]
if self.parent:
del self.parent[i + self.parent_offset]
except TypeError:
assert i.step is None, 'cannot handle slice with stride'
del self.data[i.start:i.stop]
del self.items[i.start:i.stop]
if self.parent:
del self.parent[i.start + self.parent_offset
: i.stop + self.parent_offset]
def __add__(self, other):
if isinstance(other, ViewList):
return self.__class__(self.data + other.data,
items=(self.items + other.items))
else:
raise TypeError('adding non-ViewList to a ViewList')
def __radd__(self, other):
if isinstance(other, ViewList):
return self.__class__(other.data + self.data,
items=(other.items + self.items))
else:
raise TypeError('adding ViewList to a non-ViewList')
def __iadd__(self, other):
if isinstance(other, ViewList):
self.data += other.data
else:
raise TypeError('argument to += must be a ViewList')
return self
def __mul__(self, n):
return self.__class__(self.data * n, items=(self.items * n))
__rmul__ = __mul__
def __imul__(self, n):
self.data *= n
self.items *= n
return self
def extend(self, other):
if not isinstance(other, ViewList):
raise TypeError('extending a ViewList with a non-ViewList')
if self.parent:
self.parent.insert(len(self.data) + self.parent_offset, other)
self.data.extend(other.data)
self.items.extend(other.items)
def append(self, item, source=None, offset=0):
if source is None:
self.extend(item)
else:
if self.parent:
self.parent.insert(len(self.data) + self.parent_offset, item,
source, offset)
self.data.append(item)
self.items.append((source, offset))
def insert(self, i, item, source=None, offset=0):
if source is None:
if not isinstance(item, ViewList):
raise TypeError('inserting non-ViewList with no source given')
self.data[i:i] = item.data
self.items[i:i] = item.items
if self.parent:
index = (len(self.data) + i) % len(self.data)
self.parent.insert(index + self.parent_offset, item)
else:
self.data.insert(i, item)
self.items.insert(i, (source, offset))
if self.parent:
index = (len(self.data) + i) % len(self.data)
self.parent.insert(index + self.parent_offset, item,
source, offset)
def pop(self, i=-1):
if self.parent:
index = (len(self.data) + i) % len(self.data)
self.parent.pop(index + self.parent_offset)
self.items.pop(i)
return self.data.pop(i)
def trim_start(self, n=1):
"""
Remove items from the start of the list, without touching the parent.
"""
if n > len(self.data):
raise IndexError("Size of trim too large; can't trim %s items "
"from a list of size %s." % (n, len(self.data)))
elif n < 0:
raise IndexError('Trim size must be >= 0.')
del self.data[:n]
del self.items[:n]
if self.parent:
self.parent_offset += n
def trim_end(self, n=1):
"""
Remove items from the end of the list, without touching the parent.
"""
if n > len(self.data):
raise IndexError("Size of trim too large; can't trim %s items "
"from a list of size %s." % (n, len(self.data)))
elif n < 0:
raise IndexError('Trim size must be >= 0.')
del self.data[-n:]
del self.items[-n:]
def remove(self, item):
index = self.index(item)
del self[index]
def count(self, item): return self.data.count(item)
def index(self, item): return self.data.index(item)
def reverse(self):
self.data.reverse()
self.items.reverse()
self.parent = None
def sort(self, *args):
tmp = zip(self.data, self.items)
tmp.sort(*args)
self.data = [entry[0] for entry in tmp]
self.items = [entry[1] for entry in tmp]
self.parent = None
def info(self, i):
"""Return source & offset for index `i`."""
try:
return self.items[i]
except IndexError:
if i == len(self.data): # Just past the end
return self.items[i - 1][0], None
else:
raise
def source(self, i):
"""Return source for index `i`."""
return self.info(i)[0]
def offset(self, i):
"""Return offset for index `i`."""
return self.info(i)[1]
def disconnect(self):
"""Break link between this list and parent list."""
self.parent = None
class StringList(ViewList):
"""A `ViewList` with string-specific methods."""
def trim_left(self, length, start=0, end=sys.maxint):
"""
Trim `length` characters off the beginning of each item, in-place,
from index `start` to `end`. No whitespace-checking is done on the
trimmed text. Does not affect slice parent.
"""
self.data[start:end] = [line[length:]
for line in self.data[start:end]]
def get_text_block(self, start, flush_left=0):
"""
Return a contiguous block of text.
If `flush_left` is true, raise `UnexpectedIndentationError` if an
indented line is encountered before the text block ends (with a blank
line).
"""
end = start
last = len(self.data)
while end < last:
line = self.data[end]
if not line.strip():
break
if flush_left and (line[0] == ' '):
source, offset = self.info(end)
raise UnexpectedIndentationError(self[start:end], source,
offset + 1)
end += 1
return self[start:end]
def get_indented(self, start=0, until_blank=0, strip_indent=1,
block_indent=None, first_indent=None):
"""
Extract and return a StringList of indented lines of text.
Collect all lines with indentation, determine the minimum indentation,
remove the minimum indentation from all indented lines (unless
`strip_indent` is false), and return them. All lines up to but not
including the first unindented line will be returned.
:Parameters:
- `start`: The index of the first line to examine.
- `until_blank`: Stop collecting at the first blank line if true.
- `strip_indent`: Strip common leading indent if true (default).
- `block_indent`: The indent of the entire block, if known.
- `first_indent`: The indent of the first line, if known.
:Return:
- a StringList of indented lines with mininum indent removed;
- the amount of the indent;
- a boolean: did the indented block finish with a blank line or EOF?
"""
indent = block_indent # start with None if unknown
end = start
if block_indent is not None and first_indent is None:
first_indent = block_indent
if first_indent is not None:
end += 1
last = len(self.data)
while end < last:
line = self.data[end]
if line and (line[0] != ' '
or (block_indent is not None
and line[:block_indent].strip())):
# Line not indented or insufficiently indented.
# Block finished properly iff the last indented line blank:
blank_finish = ((end > start)
and not self.data[end - 1].strip())
break
stripped = line.lstrip()
if not stripped: # blank line
if until_blank:
blank_finish = 1
break
elif block_indent is None:
line_indent = len(line) - len(stripped)
if indent is None:
indent = line_indent
else:
indent = min(indent, line_indent)
end += 1
else:
blank_finish = 1 # block ends at end of lines
block = self[start:end]
if first_indent is not None and block:
block.data[0] = block.data[0][first_indent:]
if indent and strip_indent:
block.trim_left(indent, start=(first_indent is not None))
return block, indent or 0, blank_finish
def get_2D_block(self, top, left, bottom, right, strip_indent=1):
block = self[top:bottom]
indent = right
for i in range(len(block.data)):
block.data[i] = line = block.data[i][left:right].rstrip()
if line:
indent = min(indent, len(line) - len(line.lstrip()))
if strip_indent and 0 < indent < right:
block.data = [line[indent:] for line in block.data]
return block
class StateMachineError(Exception): pass
class UnknownStateError(StateMachineError): pass
class DuplicateStateError(StateMachineError): pass
class UnknownTransitionError(StateMachineError): pass
class DuplicateTransitionError(StateMachineError): pass
class TransitionPatternNotFound(StateMachineError): pass
class TransitionMethodNotFound(StateMachineError): pass
class UnexpectedIndentationError(StateMachineError): pass
class TransitionCorrection(Exception):
"""
Raise from within a transition method to switch to another transition.
Raise with one argument, the new transition name.
"""
class StateCorrection(Exception):
"""
Raise from within a transition method to switch to another state.
Raise with one or two arguments: new state name, and an optional new
transition name.
"""
def string2lines(astring, tab_width=8, convert_whitespace=0,
whitespace=re.compile('[\v\f]')):
"""
Return a list of one-line strings with tabs expanded and no newlines.
Each tab is expanded with between 1 and `tab_width` spaces, so that the
next character's index becomes a multiple of `tab_width` (8 by default).
Parameters:
- `astring`: a multi-line string.
- `tab_width`: the number of columns between tab stops.
- `convert_whitespace`: convert form feeds and vertical tabs to spaces?
"""
if convert_whitespace:
astring = whitespace.sub(' ', astring)
return [s.expandtabs(tab_width) for s in astring.splitlines()]
def _exception_data():
"""
Return exception information:
- the exception's class name;
- the exception object;
- the name of the file containing the offending code;
- the line number of the offending code;
- the function name of the offending code.
"""
type, value, traceback = sys.exc_info()
while traceback.tb_next:
traceback = traceback.tb_next
code = traceback.tb_frame.f_code
return (type.__name__, value, code.co_filename, traceback.tb_lineno,
code.co_name)
"""Text wrapping and filling.
"""
# Copyright (C) 1999-2001 Gregory P. Ward.
# Copyright (C) 2002, 2003 Python Software Foundation.
# Written by Greg Ward <gward@python.net>
__revision__ = "$Id: textwrap.py,v 1.1 2003/08/13 16:19:29 andreasjung Exp $"
import string, re
# Do the right thing with boolean values for all known Python versions
# (so this module can be copied to projects that don't depend on Python
# 2.3, e.g. Optik and Docutils).
try:
True, False
except NameError:
(True, False) = (1, 0)
__all__ = ['TextWrapper', 'wrap', 'fill']
# Hardcode the recognized whitespace characters to the US-ASCII
# whitespace characters. The main reason for doing this is that in
# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
# that character winds up in string.whitespace. Respecting
# string.whitespace in those cases would 1) make textwrap treat 0xa0 the
# same as any other whitespace char, which is clearly wrong (it's a
# *non-breaking* space), 2) possibly cause problems with Unicode,
# since 0xa0 is not in range(128).
_whitespace = '\t\n\x0b\x0c\r '
class TextWrapper:
"""
Object for wrapping/filling text. The public interface consists of
the wrap() and fill() methods; the other methods are just there for
subclasses to override in order to tweak the default behaviour.
If you want to completely replace the main wrapping algorithm,
you'll probably have to override _wrap_chunks().
Several instance attributes control various aspects of wrapping:
width (default: 70)
the maximum width of wrapped lines (unless break_long_words
is false)
initial_indent (default: "")
string that will be prepended to the first line of wrapped
output. Counts towards the line's width.
subsequent_indent (default: "")
string that will be prepended to all lines save the first
of wrapped output; also counts towards each line's width.
expand_tabs (default: true)
Expand tabs in input text to spaces before further processing.
Each tab will become 1 .. 8 spaces, depending on its position in
its line. If false, each tab is treated as a single character.
replace_whitespace (default: true)
Replace all whitespace characters in the input text by spaces
after tab expansion. Note that if expand_tabs is false and
replace_whitespace is true, every tab will be converted to a
single space!
fix_sentence_endings (default: false)
Ensure that sentence-ending punctuation is always followed
by two spaces. Off by default because the algorithm is
(unavoidably) imperfect.
break_long_words (default: true)
Break words longer than 'width'. If false, those words will not
be broken, and some lines might be longer than 'width'.
"""
whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
unicode_whitespace_trans = {}
uspace = ord(u' ')
for x in map(ord, _whitespace):
unicode_whitespace_trans[x] = uspace
# This funky little regex is just the trick for splitting
# text up into word-wrappable chunks. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
# (after stripping out empty strings).
wordsep_re = re.compile(r'(\s+|' # any whitespace
r'-*\w{2,}-(?=\w{2,})|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
# XXX will there be a locale-or-charset-aware version of
# string.lowercase in 2.3?
sentence_end_re = re.compile(r'[%s]' # lowercase letter
r'[\.\!\?]' # sentence-ending punct.
r'[\"\']?' # optional end-of-quote
% string.lowercase)
def __init__ (self,
width=70,
initial_indent="",
subsequent_indent="",
expand_tabs=True,
replace_whitespace=True,
fix_sentence_endings=False,
break_long_words=True):
self.width = width
self.initial_indent = initial_indent
self.subsequent_indent = subsequent_indent
self.expand_tabs = expand_tabs
self.replace_whitespace = replace_whitespace
self.fix_sentence_endings = fix_sentence_endings
self.break_long_words = break_long_words
# -- Private methods -----------------------------------------------
# (possibly useful for subclasses to override)
def _munge_whitespace(self, text):
"""_munge_whitespace(text : string) -> string
Munge whitespace in text: expand tabs and convert all other
whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"
becomes " foo bar baz".
"""
if self.expand_tabs:
text = text.expandtabs()
if self.replace_whitespace:
if isinstance(text, str):
text = text.translate(self.whitespace_trans)
elif isinstance(text, unicode):
text = text.translate(self.unicode_whitespace_trans)
return text
def _split(self, text):
"""_split(text : string) -> [string]
Split the text to wrap into indivisible chunks. Chunks are
not quite the same as words; see wrap_chunks() for full
details. As an example, the text
Look, goof-ball -- use the -b option!
breaks into the following chunks:
'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
'use', ' ', 'the', ' ', '-b', ' ', 'option!'
"""
chunks = self.wordsep_re.split(text)
chunks = filter(None, chunks)
return chunks
def _fix_sentence_endings(self, chunks):
"""_fix_sentence_endings(chunks : [string])
Correct for sentence endings buried in 'chunks'. Eg. when the
original text contains "... foo.\nBar ...", munge_whitespace()
and split() will convert that to [..., "foo.", " ", "Bar", ...]
which has one too few spaces; this method simply changes the one
space to two.
"""
i = 0
pat = self.sentence_end_re
while i < len(chunks)-1:
if chunks[i+1] == " " and pat.search(chunks[i]):
chunks[i+1] = " "
i += 2
else:
i += 1
def _handle_long_word(self, chunks, cur_line, cur_len, width):
"""_handle_long_word(chunks : [string],
cur_line : [string],
cur_len : int, width : int)
Handle a chunk of text (most likely a word, not whitespace) that
is too long to fit in any line.
"""
space_left = width - cur_len
# If we're allowed to break long words, then do so: put as much
# of the next chunk onto the current line as will fit.
if self.break_long_words:
cur_line.append(chunks[0][0:space_left])
chunks[0] = chunks[0][space_left:]
# Otherwise, we have to preserve the long word intact. Only add
# it to the current line if there's nothing already there --
# that minimizes how much we violate the width constraint.
elif not cur_line:
cur_line.append(chunks.pop(0))
# If we're not allowed to break long words, and there's already
# text on the current line, do nothing. Next time through the
# main loop of _wrap_chunks(), we'll wind up here again, but
# cur_len will be zero, so the next line will be entirely
# devoted to the long word that we can't handle right now.
def _wrap_chunks(self, chunks):
"""_wrap_chunks(chunks : [string]) -> [string]
Wrap a sequence of text chunks and return a list of lines of
length 'self.width' or less. (If 'break_long_words' is false,
some lines may be longer than this.) Chunks correspond roughly
to words and the whitespace between them: each chunk is
indivisible (modulo 'break_long_words'), but a line break can
come between any two chunks. Chunks should not have internal
whitespace; ie. a chunk is either all whitespace or a "word".
Whitespace chunks will be removed from the beginning and end of
lines, but apart from that whitespace is preserved.
"""
lines = []
if self.width <= 0:
raise ValueError("invalid width %r (must be > 0)" % self.width)
while chunks:
# Start the list of chunks that will make up the current line.
# cur_len is just the length of all the chunks in cur_line.
cur_line = []
cur_len = 0
# Figure out which static string will prefix this line.
if lines:
indent = self.subsequent_indent
else:
indent = self.initial_indent
# Maximum width for this line.
width = self.width - len(indent)
# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (ie. no lines started yet).
if chunks[0].strip() == '' and lines:
del chunks[0]
while chunks:
l = len(chunks[0])
# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
cur_line.append(chunks.pop(0))
cur_len += l
# Nope, this line is full.
else:
break
# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and len(chunks[0]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
# If the last chunk on this line is all whitespace, drop it.
if cur_line and cur_line[-1].strip() == '':
del cur_line[-1]
# Convert current line back to a string and store it in list
# of all lines (return value).
if cur_line:
lines.append(indent + ''.join(cur_line))
return lines
# -- Public interface ----------------------------------------------
def wrap(self, text):
"""wrap(text : string) -> [string]
Reformat the single paragraph in 'text' so it fits in lines of
no more than 'self.width' columns, and return a list of wrapped
lines. Tabs in 'text' are expanded with string.expandtabs(),
and all other whitespace characters (including newline) are
converted to space.
"""
text = self._munge_whitespace(text)
indent = self.initial_indent
if len(text) + len(indent) <= self.width:
return [indent + text]
chunks = self._split(text)
if self.fix_sentence_endings:
self._fix_sentence_endings(chunks)
return self._wrap_chunks(chunks)
def fill(self, text):
"""fill(text : string) -> string
Reformat the single paragraph in 'text' to fit in lines of no
more than 'self.width' columns, and return a new string
containing the entire wrapped paragraph.
"""
return "\n".join(self.wrap(text))
# -- Convenience interface ---------------------------------------------
def wrap(text, width=70, **kwargs):
"""Wrap a single paragraph of text, returning a list of wrapped lines.
Reformat the single paragraph in 'text' so it fits in lines of no
more than 'width' columns, and return a list of wrapped lines. By
default, tabs in 'text' are expanded with string.expandtabs(), and
all other whitespace characters (including newline) are converted to
space. See TextWrapper class for available keyword args to customize
wrapping behaviour.
"""
w = TextWrapper(width=width, **kwargs)
return w.wrap(text)
def fill(text, width=70, **kwargs):
"""Fill a single paragraph of text, returning a new string.
Reformat the single paragraph in 'text' to fit in lines of no more
than 'width' columns, and return a new string containing the entire
wrapped paragraph. As with wrap(), tabs are expanded and other
whitespace characters converted to space. See TextWrapper class for
available keyword args to customize wrapping behaviour.
"""
w = TextWrapper(width=width, **kwargs)
return w.fill(text)
# -- Loosely related functionality -------------------------------------
def dedent(text):
"""dedent(text : string) -> string
Remove any whitespace than can be uniformly removed from the left
of every line in `text`.
This can be used e.g. to make triple-quoted strings line up with
the left edge of screen/whatever, while still presenting it in the
source code in indented form.
For example:
def test():
# end first line with \ to avoid the empty line!
s = '''\
hello
world
'''
print repr(s) # prints ' hello\n world\n '
print repr(dedent(s)) # prints 'hello\n world\n'
"""
lines = text.expandtabs().split('\n')
margin = None
for line in lines:
content = line.lstrip()
if not content:
continue
indent = len(line) - len(content)
if margin is None:
margin = indent
else:
margin = min(margin, indent)
if margin is not None and margin > 0:
for i in range(len(lines)):
lines[i] = lines[i][margin:]
return '\n'.join(lines)
"""
`schemes` is a dictionary with lowercase URI addressing schemes as
keys and descriptions as values. It was compiled from the index at
http://www.w3.org/Addressing/schemes.html (revised 2001-08-20).
"""
# Many values are blank and should be filled in with useful descriptions.
schemes = {
'about': 'provides information on Navigator',
'acap': 'Application Configuration Access Protocol',
'addbook': "To add vCard entries to Communicator's Address Book",
'afp': 'Apple Filing Protocol',
'afs': 'Andrew File System global file names',
'aim': 'AOL Instant Messenger',
'callto': 'for NetMeeting links',
'castanet': 'Castanet Tuner URLs for Netcaster',
'chttp': 'cached HTTP supported by RealPlayer',
'cid': 'content identifier',
'data': ('allows inclusion of small data items as "immediate" data; '
'RFC 2397'),
'dav': 'Distributed Authoring and Versioning Protocol; RFC 2518',
'dns': 'Domain Name System resources',
'eid': ('External ID; non-URL data; general escape mechanism to allow '
'access to information for applications that are too '
'specialized to justify their own schemes'),
'fax': ('a connection to a terminal that can handle telefaxes '
'(facsimiles); RFC 2806'),
'file': 'Host-specific file names',
'finger': '',
'freenet': '',
'ftp': 'File Transfer Protocol',
'gopher': 'The Gopher Protocol',
'gsm-sms': ('Global System for Mobile Communications Short Message '
'Service'),
'h323': 'video (audiovisual) communication on local area networks',
'h324': ('video and audio communications over low bitrate connections '
'such as POTS modem connections'),
'hdl': 'CNRI handle system',
'hnews': 'an HTTP-tunneling variant of the NNTP news protocol',
'http': 'Hypertext Transfer Protocol',
'https': 'HTTP over SSL',
'iioploc': 'Internet Inter-ORB Protocol Location?',
'ilu': 'Inter-Language Unification',
'imap': 'Internet Message Access Protocol',
'ior': 'CORBA interoperable object reference',
'ipp': 'Internet Printing Protocol',
'irc': 'Internet Relay Chat',
'jar': 'Java archive',
'javascript': ('JavaScript code; evaluates the expression after the '
'colon'),
'jdbc': '',
'ldap': 'Lightweight Directory Access Protocol',
'lifn': '',
'livescript': '',
'lrq': '',
'mailbox': 'Mail folder access',
'mailserver': 'Access to data available from mail servers',
'mailto': 'Electronic mail address',
'md5': '',
'mid': 'message identifier',
'mocha': '',
'modem': ('a connection to a terminal that can handle incoming data '
'calls; RFC 2806'),
'news': 'USENET news',
'nfs': 'Network File System protocol',
'nntp': 'USENET news using NNTP access',
'opaquelocktoken': '',
'phone': '',
'pop': 'Post Office Protocol',
'pop3': 'Post Office Protocol v3',
'printer': '',
'prospero': 'Prospero Directory Service',
'res': '',
'rtsp': 'real time streaming protocol',
'rvp': '',
'rwhois': '',
'rx': 'Remote Execution',
'sdp': '',
'service': 'service location',
'shttp': 'secure hypertext transfer protocol',
'sip': 'Session Initiation Protocol',
'smb': '',
'snews': 'For NNTP postings via SSL',
't120': 'real time data conferencing (audiographics)',
'tcp': '',
'tel': ('a connection to a terminal that handles normal voice '
'telephone calls, a voice mailbox or another voice messaging '
'system or a service that can be operated using DTMF tones; '
'RFC 2806.'),
'telephone': 'telephone',
'telnet': 'Reference to interactive sessions',
'tip': 'Transaction Internet Protocol',
'tn3270': 'Interactive 3270 emulation sessions',
'tv': '',
'urn': 'Uniform Resource Name',
'uuid': '',
'vemmi': 'versatile multimedia interface',
'videotex': '',
'view-source': 'displays HTML code that was generated with JavaScript',
'wais': 'Wide Area Information Servers',
'whodp': '',
'whois++': 'Distributed directory service.',
'z39.50r': 'Z39.50 Retrieval',
'z39.50s': 'Z39.50 Session',}
# Author: David Goodger
# Contact: goodger@users.sourceforge.net
# Revision: $Revision: 1.3 $
# Date: $Date: 2003/07/10 15:49:30 $
# Copyright: This module has been placed in the public domain.
"""
Miscellaneous utilities for the documentation utilities.
"""
__docformat__ = 'reStructuredText'
import sys
import os
import os.path
from types import StringType, UnicodeType
from docutils import ApplicationError, DataError
from docutils import frontend, nodes
class SystemMessage(ApplicationError):
def __init__(self, system_message, level):
Exception.__init__(self, system_message.astext())
self.level = level
class Reporter:
"""
Info/warning/error reporter and ``system_message`` element generator.
Five levels of system messages are defined, along with corresponding
methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
There is typically one Reporter object per process. A Reporter object is
instantiated with thresholds for reporting (generating warnings) and
halting processing (raising exceptions), a switch to turn debug output on
or off, and an I/O stream for warnings. These are stored in the default
reporting category, '' (zero-length string).
Multiple reporting categories [#]_ may be set, each with its own reporting
and halting thresholds, debugging switch, and warning stream
(collectively a `ConditionSet`). Categories are hierarchical dotted-name
strings that look like attribute references: 'spam', 'spam.eggs',
'neeeow.wum.ping'. The 'spam' category is the ancestor of
'spam.bacon.eggs'. Unset categories inherit stored conditions from their
closest ancestor category that has been set.
When a system message is generated, the stored conditions from its
category (or ancestor if unset) are retrieved. The system message level
is compared to the thresholds stored in the category, and a warning or
error is generated as appropriate. Debug messages are produced iff the
stored debug switch is on. Message output is sent to the stored warning
stream.
The default category is '' (empty string). By convention, Writers should
retrieve reporting conditions from the 'writer' category (which, unless
explicitly set, defaults to the conditions of the default category).
The Reporter class also employs a modified form of the "Observer" pattern
[GoF95]_ to track system messages generated. The `attach_observer` method
should be called before parsing, with a bound method or function which
accepts system messages. The observer can be removed with
`detach_observer`, and another added in its place.
.. [#] The concept of "categories" was inspired by the log4j project:
http://jakarta.apache.org/log4j/.
.. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1995.
"""
levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
"""List of names for system message levels, indexed by level."""
def __init__(self, source, report_level, halt_level, stream=None,
debug=0, encoding='ascii', error_handler='replace'):
"""
Initialize the `ConditionSet` forthe `Reporter`'s default category.
:Parameters:
- `source`: The path to or description of the source data.
- `report_level`: The level at or above which warning output will
be sent to `stream`.
- `halt_level`: The level at or above which `SystemMessage`
exceptions will be raised, halting execution.
- `debug`: Show debug (level=0) system messages?
- `stream`: Where warning output is sent. Can be file-like (has a
``.write`` method), a string (file name, opened for writing), or
`None` (implies `sys.stderr`; default).
- `encoding`: The encoding for stderr output.
- `error_handler`: The error handler for stderr output encoding.
"""
self.source = source
"""The path to or description of the source data."""
if stream is None:
stream = sys.stderr
elif type(stream) in (StringType, UnicodeType):
raise NotImplementedError('This should open a file for writing.')
self.encoding = encoding
"""The character encoding for the stderr output."""
self.error_handler = error_handler
"""The character encoding error handler."""
self.categories = {'': ConditionSet(debug, report_level, halt_level,
stream)}
"""Mapping of category names to conditions. Default category is ''."""
self.observers = []
"""List of bound methods or functions to call with each system_message
created."""
self.max_level = -1
"""The highest level system message generated so far."""
def set_conditions(self, category, report_level, halt_level,
stream=None, debug=0):
if stream is None:
stream = sys.stderr
self.categories[category] = ConditionSet(debug, report_level,
halt_level, stream)
def unset_conditions(self, category):
if category and self.categories.has_key(category):
del self.categories[category]
__delitem__ = unset_conditions
def get_conditions(self, category):
while not self.categories.has_key(category):
category = category[:category.rfind('.') + 1][:-1]
return self.categories[category]
__getitem__ = get_conditions
def attach_observer(self, observer):
"""
The `observer` parameter is a function or bound method which takes one
argument, a `nodes.system_message` instance.
"""
self.observers.append(observer)
def detach_observer(self, observer):
self.observers.remove(observer)
def notify_observers(self, message):
for observer in self.observers:
observer(message)
def system_message(self, level, message, *children, **kwargs):
"""
Return a system_message object.
Raise an exception or generate a warning if appropriate.
"""
attributes = kwargs.copy()
category = kwargs.get('category', '')
if kwargs.has_key('category'):
del attributes['category']
if kwargs.has_key('base_node'):
source, line = get_source_line(kwargs['base_node'])
del attributes['base_node']
if source is not None:
attributes.setdefault('source', source)
if line is not None:
attributes.setdefault('line', line)
attributes.setdefault('source', self.source)
msg = nodes.system_message(message, level=level,
type=self.levels[level],
*children, **attributes)
debug, report_level, halt_level, stream = self[category].astuple()
if level >= report_level or debug and level == 0:
msgtext = msg.astext().encode(self.encoding, self.error_handler)
if category:
print >>stream, msgtext, '[%s]' % category
else:
print >>stream, msgtext
if level >= halt_level:
raise SystemMessage(msg, level)
if level > 0 or debug:
self.notify_observers(msg)
self.max_level = max(level, self.max_level)
return msg
def debug(self, *args, **kwargs):
"""
Level-0, "DEBUG": an internal reporting issue. Typically, there is no
effect on the processing. Level-0 system messages are handled
separately from the others.
"""
return self.system_message(0, *args, **kwargs)
def info(self, *args, **kwargs):
"""
Level-1, "INFO": a minor issue that can be ignored. Typically there is
no effect on processing, and level-1 system messages are not reported.
"""
return self.system_message(1, *args, **kwargs)
def warning(self, *args, **kwargs):
"""
Level-2, "WARNING": an issue that should be addressed. If ignored,
there may be unpredictable problems with the output.
"""
return self.system_message(2, *args, **kwargs)
def error(self, *args, **kwargs):
"""
Level-3, "ERROR": an error that should be addressed. If ignored, the
output will contain errors.
"""
return self.system_message(3, *args, **kwargs)
def severe(self, *args, **kwargs):
"""
Level-4, "SEVERE": a severe error that must be addressed. If ignored,
the output will contain severe errors. Typically level-4 system
messages are turned into exceptions which halt processing.
"""
return self.system_message(4, *args, **kwargs)
class ConditionSet:
"""
A set of two thresholds (`report_level` & `halt_level`), a switch
(`debug`), and an I/O stream (`stream`), corresponding to one `Reporter`
category.
"""
def __init__(self, debug, report_level, halt_level, stream):
self.debug = debug
self.report_level = report_level
self.halt_level = halt_level
self.stream = stream
def astuple(self):
return (self.debug, self.report_level, self.halt_level,
self.stream)
class ExtensionOptionError(DataError): pass
class BadOptionError(ExtensionOptionError): pass
class BadOptionDataError(ExtensionOptionError): pass
class DuplicateOptionError(ExtensionOptionError): pass
def extract_extension_options(field_list, options_spec):
"""
Return a dictionary mapping extension option names to converted values.
:Parameters:
- `field_list`: A flat field list without field arguments, where each
field body consists of a single paragraph only.
- `options_spec`: Dictionary mapping known option names to a
conversion function such as `int` or `float`.
:Exceptions:
- `KeyError` for unknown option names.
- `ValueError` for invalid option values (raised by the conversion
function).
- `DuplicateOptionError` for duplicate options.
- `BadOptionError` for invalid fields.
- `BadOptionDataError` for invalid option data (missing name,
missing data, bad quotes, etc.).
"""
option_list = extract_options(field_list)
option_dict = assemble_option_dict(option_list, options_spec)
return option_dict
def extract_options(field_list):
"""
Return a list of option (name, value) pairs from field names & bodies.
:Parameter:
`field_list`: A flat field list, where each field name is a single
word and each field body consists of a single paragraph only.
:Exceptions:
- `BadOptionError` for invalid fields.
- `BadOptionDataError` for invalid option data (missing name,
missing data, bad quotes, etc.).
"""
option_list = []
for field in field_list:
if len(field[0].astext().split()) != 1:
raise BadOptionError(
'extension option field name may not contain multiple words')
name = str(field[0].astext().lower())
body = field[1]
if len(body) == 0:
data = None
elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
raise BadOptionDataError(
'extension option field body may contain\n'
'a single paragraph only (option "%s")' % name)
else:
data = body[0][0].astext()
option_list.append((name, data))
return option_list
def assemble_option_dict(option_list, options_spec):
"""
Return a mapping of option names to values.
:Parameters:
- `option_list`: A list of (name, value) pairs (the output of
`extract_options()`).
- `options_spec`: Dictionary mapping known option names to a
conversion function such as `int` or `float`.
:Exceptions:
- `KeyError` for unknown option names.
- `DuplicateOptionError` for duplicate options.
- `ValueError` for invalid option values (raised by conversion
function).
"""
options = {}
for name, value in option_list:
convertor = options_spec[name] # raises KeyError if unknown
if options.has_key(name):
raise DuplicateOptionError('duplicate option "%s"' % name)
try:
options[name] = convertor(value)
except (ValueError, TypeError), detail:
raise detail.__class__('(option: "%s"; value: %r)\n%s'
% (name, value, detail))
return options
class NameValueError(DataError): pass
def extract_name_value(line):
"""
Return a list of (name, value) from a line of the form "name=value ...".
:Exception:
`NameValueError` for invalid input (missing name, missing data, bad
quotes, etc.).
"""
attlist = []
while line:
equals = line.find('=')
if equals == -1:
raise NameValueError('missing "="')
attname = line[:equals].strip()
if equals == 0 or not attname:
raise NameValueError(
'missing attribute name before "="')
line = line[equals+1:].lstrip()
if not line:
raise NameValueError(
'missing value after "%s="' % attname)
if line[0] in '\'"':
endquote = line.find(line[0], 1)
if endquote == -1:
raise NameValueError(
'attribute "%s" missing end quote (%s)'
% (attname, line[0]))
if len(line) > endquote + 1 and line[endquote + 1].strip():
raise NameValueError(
'attribute "%s" end quote (%s) not followed by '
'whitespace' % (attname, line[0]))
data = line[1:endquote]
line = line[endquote+1:].lstrip()
else:
space = line.find(' ')
if space == -1:
data = line
line = ''
else:
data = line[:space]
line = line[space+1:].lstrip()
attlist.append((attname.lower(), data))
return attlist
def new_document(source, settings=None):
"""
Return a new empty document object.
:Parameters:
`source` : string
The path to or description of the source text of the document.
`settings` : optparse.Values object
Runtime settings. If none provided, a default set will be used.
"""
if settings is None:
settings = frontend.OptionParser().get_default_values()
reporter = Reporter(source, settings.report_level, settings.halt_level,
stream=settings.warning_stream, debug=settings.debug,
encoding=settings.error_encoding,
error_handler=settings.error_encoding_error_handler)
document = nodes.document(settings, reporter, source=source)
document.note_source(source, -1)
return document
def clean_rcs_keywords(paragraph, keyword_substitutions):
if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
textnode = paragraph[0]
for pattern, substitution in keyword_substitutions:
match = pattern.match(textnode.data)
if match:
textnode.data = pattern.sub(substitution, textnode.data)
return
def relative_path(source, target):
"""
Build and return a path to `target`, relative to `source` (both files).
If there is no common prefix, return the absolute path to `target`.
"""
source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
target_parts = os.path.abspath(target).split(os.sep)
# Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
if source_parts[:2] != target_parts[:2]:
# Nothing in common between paths.
# Return absolute path, using '/' for URLs:
return '/'.join(target_parts)
source_parts.reverse()
target_parts.reverse()
while (source_parts and target_parts
and source_parts[-1] == target_parts[-1]):
# Remove path components in common:
source_parts.pop()
target_parts.pop()
target_parts.reverse()
parts = ['..'] * (len(source_parts) - 1) + target_parts
return '/'.join(parts)
def get_source_line(node):
"""
Return the "source" and "line" attributes from the `node` given or from
its closest ancestor.
"""
while node:
if node.source or node.line:
return node.source, node.line
node = node.parent
return None, None
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment