# Author: David Goodger
# Contact:
# Revision: $Revision: 1.3 $
# Date: $Date: 2003/07/10 15:49:30 $
# Copyright: This module has been placed in the public domain.
This is the Docutils (Python Documentation Utilities) package.
Package Structure
- Contains the package docstring only (this text).
- Contains the ``Publisher`` class and ``publish()`` convenience
- Command-line and common processing for Docutils front-ends.
- Provides a uniform API for low-level input and output.
- Docutils document tree (doctree) node class library.
- A finite state machine specialized for
regular-expression-based text filters.
- Contains a complete mapping of known URI addressing
scheme names to descriptions.
- Contains the ``Reporter`` system warning class and miscellaneous
- languages: Language-specific mappings of terms.
- parsers: Syntax-specific input parser modules or packages.
- readers: Context-specific input handlers which understand the data
source and manage a parser.
- transforms: Modules used by readers and writers to modify DPS
- writers: Format-specific output translators.
__docformat__ = 'reStructuredText'
__version__ = '0.3.0'
"""``major.minor.micro`` version number. The micro number is bumped any time
there's a change in the API incompatible with one of the front ends. The
minor number is bumped whenever there is a project release. The major number
will be bumped when the project is feature-complete, and perhaps if there is a
major change in the design."""
class ApplicationError(StandardError): pass
class DataError(ApplicationError): pass
class SettingsSpec:
Runtime setting specification base class.
SettingsSpec subclass objects used by `docutils.frontend.OptionParser`.
settings_spec = ()
"""Runtime settings specification. Override in subclasses.
Specifies runtime settings and associated command-line options, as used by
`docutils.frontend.OptionParser`. This tuple contains one or more sets of
option group title, description, and a list/tuple of tuples: ``('help
text', [list of option strings], {keyword arguments})``. Group title
and/or description may be `None`; no group title implies no group, just a
list of single options. Runtime settings names are derived implicitly
from long option names ("--a-setting" becomes ``settings.a_setting``) or
explicitly from the "dest" keyword argument."""
settings_defaults = None
"""A dictionary of defaults for internal or inaccessible (by command-line
or config file) settings. Override in subclasses."""
settings_default_overrides = None
"""A dictionary of auxiliary defaults, to override defaults for settings
defined in other components. Override in subclasses."""
relative_path_settings = ()
"""Settings containing filesystem paths. Override in subclasses.
Settings listed here are to be interpreted relative to the current working
class TransformSpec:
Runtime transform specification base class.
TransformSpec subclass objects used by `docutils.transforms.Transformer`.
default_transforms = ()
"""Transforms required by this class. Override in subclasses."""
class Component(SettingsSpec, TransformSpec):
"""Base class for Docutils components."""
component_type = None
"""Override in subclasses."""
supported = ()
"""Names for this component. Override in subclasses."""
def supports(self, format):
Is `format` supported by this component?
To be used by transforms to ask the dependent component if it supports
a certain input context or output format.
return format in self.supported
# Author: David Goodger
# Contact:
# Revision: $Revision: 1.3 $
# Date: $Date: 2003/07/10 15:49:30 $
# Copyright: This module has been placed in the public domain.
I/O classes provide a uniform API for low-level input and output. Subclasses
will exist for a variety of input/output mechanisms.
__docformat__ = 'reStructuredText'
import sys
import locale
from types import UnicodeType
from docutils import TransformSpec
class Input(TransformSpec):
Abstract base class for input wrappers.
component_type = 'input'
default_source_path = None
def __init__(self, source=None, source_path=None, encoding=None):
self.encoding = encoding
"""Text encoding for the input source."""
self.source = source
"""The source of input data."""
self.source_path = source_path
"""A text reference to the source."""
if not source_path:
self.source_path = self.default_source_path
def __repr__(self):
return '%s: source=%r, source_path=%r' % (self.__class__, self.source,
def read(self):
raise NotImplementedError
def decode(self, data):
Decode a string, `data`, heuristically.
Raise UnicodeError if unsuccessful.
The client application should call ``locale.setlocale`` at the
beginning of processing::
locale.setlocale(locale.LC_ALL, '')
if (self.encoding and self.encoding.lower() == 'unicode'
or isinstance(data, UnicodeType)):
return unicode(data)
encodings = [self.encoding, 'utf-8']
for enc in encodings:
if not enc:
return unicode(data, enc)
except (UnicodeError, LookupError):
raise UnicodeError(
'Unable to decode input data. Tried the following encodings: %s.'
% ', '.join([repr(enc) for enc in encodings if enc]))
class Output(TransformSpec):
Abstract base class for output wrappers.
component_type = 'output'
default_destination_path = None
def __init__(self, destination=None, destination_path=None,
encoding=None, error_handler='strict'):
self.encoding = encoding
"""Text encoding for the output destination."""
self.error_handler = error_handler or 'strict'
"""Text encoding error handler."""
self.destination = destination
"""The destination for output data."""
self.destination_path = destination_path
"""A text reference to the destination."""
if not destination_path:
self.destination_path = self.default_destination_path
def __repr__(self):
return ('%s: destination=%r, destination_path=%r'
% (self.__class__, self.destination, self.destination_path))
def write(self, data):
raise NotImplementedError
def encode(self, data):
if self.encoding and self.encoding.lower() == 'unicode':
return data
return data.encode(self.encoding, self.error_handler)
class FileInput(Input):
Input for single, simple file-like objects.
def __init__(self, source=None, source_path=None,
encoding=None, autoclose=1, handle_io_errors=1):
- `source`: either a file-like object (which is read directly), or
`None` (which implies `sys.stdin` if no `source_path` given).
- `source_path`: a path to a file, which is opened and then read.
- `autoclose`: close automatically after read (boolean); always
false if `sys.stdin` is the source.
Input.__init__(self, source, source_path, encoding)
self.autoclose = autoclose
self.handle_io_errors = handle_io_errors
if source is None:
if source_path:
self.source = open(source_path)
except IOError, error:
if not handle_io_errors:
print >>sys.stderr, '%s: %s' % (error.__class__.__name__,
print >>sys.stderr, (
'Unable to open source file for reading (%s). Exiting.'
% source_path)
self.source = sys.stdin
self.autoclose = None
if not source_path:
self.source_path =
except AttributeError:
def read(self):
"""Read and decode a single file and return the data."""
data =
if self.autoclose:
return self.decode(data)
def close(self):
class FileOutput(Output):
Output for single, simple file-like objects.
def __init__(self, destination=None, destination_path=None,
encoding=None, error_handler='strict', autoclose=1,
- `destination`: either a file-like object (which is written
directly) or `None` (which implies `sys.stdout` if no
`destination_path` given).
- `destination_path`: a path to a file, which is opened and then
- `autoclose`: close automatically after write (boolean); always
false if `sys.stdout` is the destination.
Output.__init__(self, destination, destination_path,
encoding, error_handler)
self.opened = 1
self.autoclose = autoclose
self.handle_io_errors = handle_io_errors
if destination is None:
if destination_path:
self.opened = None
self.destination = sys.stdout
self.autoclose = None
if not destination_path:
self.destination_path =
except AttributeError:
def open(self):
self.destination = open(self.destination_path, 'w')
except IOError, error:
if not self.handle_io_errors:
print >>sys.stderr, '%s: %s' % (error.__class__.__name__,
print >>sys.stderr, ('Unable to open destination file for writing '
'(%s). Exiting.' % source_path)
self.opened = 1
def write(self, data):
"""Encode `data`, write it to a single file, and return it."""
output = self.encode(data)
if not self.opened:
if self.autoclose:
return output
def close(self):
self.opened = None
class StringInput(Input):
Direct string input.
default_source_path = '<string>'
def read(self):
"""Decode and return the source string."""
return self.decode(self.source)
class StringOutput(Output):
Direct string output.
default_destination_path = '<string>'
def write(self, data):
"""Encode `data`, store it in `self.destination`, and return it."""
self.destination = self.encode(data)
return self.destination
class NullInput(Input):
Degenerate input: read nothing.
default_source_path = 'null input'
def read(self):
"""Return a null string."""
return u''
class NullOutput(Output):
Degenerate output: write nothing.
default_destination_path = 'null output'
def write(self, data):
"""Do nothing ([don't even] send data to the bit bucket)."""
"""Convert to and from Roman numerals"""
__author__ = "Mark Pilgrim ("
__version__ = "1.4"
__date__ = "8 August 2001"
__copyright__ = """Copyright (c) 2001 Mark Pilgrim
This program is part of "Dive Into Python", a free Python tutorial for
experienced programmers. Visit for the
latest version.
This program is free software; you can redistribute it and/or modify
it under the terms of the Python 2.1.1 license, available at
import re
#Define exceptions
class RomanError(Exception): pass
class OutOfRangeError(RomanError): pass
class NotIntegerError(RomanError): pass
class InvalidRomanNumeralError(RomanError): pass
#Define digit mapping
romanNumeralMap = (('M', 1000),
('CM', 900),
('D', 500),
('CD', 400),
('C', 100),
('XC', 90),
('L', 50),
('XL', 40),
('X', 10),
('IX', 9),
('V', 5),
('IV', 4),
('I', 1))
def toRoman(n):
"""convert integer to Roman numeral"""
if not (0 < n < 5000):
raise OutOfRangeError, "number out of range (must be 1..4999)"
if int(n) <> n:
raise NotIntegerError, "decimals can not be converted"
result = ""
for numeral, integer in romanNumeralMap:
while n >= integer:
result += numeral
n -= integer
return result
#Define pattern to detect valid Roman numerals
romanNumeralPattern = re.compile('''
^ # beginning of string
M{0,4} # thousands - 0 to 4 M's
(CM|CD|D?C{0,3}) # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 C's),
# or 500-800 (D, followed by 0 to 3 C's)
(XC|XL|L?X{0,3}) # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 X's),
# or 50-80 (L, followed by 0 to 3 X's)
(IX|IV|V?I{0,3}) # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 I's),
# or 5-8 (V, followed by 0 to 3 I's)
$ # end of string
''' ,re.VERBOSE)
def fromRoman(s):
"""convert Roman numeral to integer"""
if not s:
raise InvalidRomanNumeralError, 'Input can not be blank'
if not
raise InvalidRomanNumeralError, 'Invalid Roman numeral: %s' % s
result = 0
index = 0
for numeral, integer in romanNumeralMap:
while s[index:index+len(numeral)] == numeral:
result += integer
index += len(numeral)
return result
`schemes` is a dictionary with lowercase URI addressing schemes as
keys and descriptions as values. It was compiled from the index at (revised 2001-08-20).
# Many values are blank and should be filled in with useful descriptions.
schemes = {
'about': 'provides information on Navigator',
'acap': 'Application Configuration Access Protocol',
'addbook': "To add vCard entries to Communicator's Address Book",
'afp': 'Apple Filing Protocol',
'afs': 'Andrew File System global file names',
'aim': 'AOL Instant Messenger',
'callto': 'for NetMeeting links',
'castanet': 'Castanet Tuner URLs for Netcaster',
'chttp': 'cached HTTP supported by RealPlayer',
'cid': 'content identifier',
'data': ('allows inclusion of small data items as "immediate" data; '
'RFC 2397'),
'dav': 'Distributed Authoring and Versioning Protocol; RFC 2518',
'dns': 'Domain Name System resources',
'eid': ('External ID; non-URL data; general escape mechanism to allow '
'access to information for applications that are too '
'specialized to justify their own schemes'),
'fax': ('a connection to a terminal that can handle telefaxes '
'(facsimiles); RFC 2806'),
'file': 'Host-specific file names',
'finger': '',
'freenet': '',
'ftp': 'File Transfer Protocol',
'gopher': 'The Gopher Protocol',
'gsm-sms': ('Global System for Mobile Communications Short Message '
'h323': 'video (audiovisual) communication on local area networks',
'h324': ('video and audio communications over low bitrate connections '
'such as POTS modem connections'),
'hdl': 'CNRI handle system',
'hnews': 'an HTTP-tunneling variant of the NNTP news protocol',
'http': 'Hypertext Transfer Protocol',
'https': 'HTTP over SSL',
'iioploc': 'Internet Inter-ORB Protocol Location?',
'ilu': 'Inter-Language Unification',
'imap': 'Internet Message Access Protocol',
'ior': 'CORBA interoperable object reference',
'ipp': 'Internet Printing Protocol',
'irc': 'Internet Relay Chat',
'jar': 'Java archive',
'javascript': ('JavaScript code; evaluates the expression after the '
'jdbc': '',
'ldap': 'Lightweight Directory Access Protocol',
'lifn': '',
'livescript': '',
'lrq': '',
'mailbox': 'Mail folder access',
'mailserver': 'Access to data available from mail servers',
'mailto': 'Electronic mail address',
'md5': '',
'mid': 'message identifier',
'mocha': '',
'modem': ('a connection to a terminal that can handle incoming data '
'calls; RFC 2806'),
'news': 'USENET news',
'nfs': 'Network File System protocol',
'nntp': 'USENET news using NNTP access',
'opaquelocktoken': '',
'phone': '',
'pop': 'Post Office Protocol',
'pop3': 'Post Office Protocol v3',
'printer': '',
'prospero': 'Prospero Directory Service',
'res': '',
'rtsp': 'real time streaming protocol',
'rvp': '',
'rwhois': '',
'rx': 'Remote Execution',
'sdp': '',
'service': 'service location',
'shttp': 'secure hypertext transfer protocol',
'sip': 'Session Initiation Protocol',
'smb': '',
'snews': 'For NNTP postings via SSL',
't120': 'real time data conferencing (audiographics)',
'tcp': '',
'tel': ('a connection to a terminal that handles normal voice '
'telephone calls, a voice mailbox or another voice messaging '
'system or a service that can be operated using DTMF tones; '
'RFC 2806.'),
'telephone': 'telephone',
'telnet': 'Reference to interactive sessions',
'tip': 'Transaction Internet Protocol',
'tn3270': 'Interactive 3270 emulation sessions',
'tv': '',
'urn': 'Uniform Resource Name',
'uuid': '',
'vemmi': 'versatile multimedia interface',
'videotex': '',
'view-source': 'displays HTML code that was generated with JavaScript',
'wais': 'Wide Area Information Servers',
'whodp': '',
'whois++': 'Distributed directory service.',
'z39.50r': 'Z39.50 Retrieval',
'z39.50s': 'Z39.50 Session',}
