Commit 1c006d9e authored by Yoshinori Okuji's avatar Yoshinori Okuji

tweaking the repository structure.

parents
##############################################################################
#
# Yoshinori OKUJI <yo@nexedi.com>
#
# Copyright (C) 2003 Nexedi SARL
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ?See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ?02111-1307, USA.
#
##############################################################################
from xml.dom.minidom import parse, parseString, getDOMImplementation
import sys
import getopt
import os
from StringIO import StringIO
import re
import codecs
class ERP5Diff:
"""
Make a difference between two XML documents using XUpdate.
Use some assumptions in ERP5's data representation.
The strategy is:
1. Find a matching element among elements of the other XML document at the same depth.
2. Use the first matching element, even if there can be other better elements.
3. Assume that two elements are matching, if the tag names are identical. If either of
them has an attribute 'id', the values of the attributes 'id' also must be identical.
4. Don't use xupdate:rename for elements. It should be quite rare to rename tag names
in ERP5, and it is too complicated to support this renaming.
5. Ignore some types of nodes, such as EntityReference and Comment, because they are not
used in ERP5 XML documents.
"""
def __init__(self):
"""
Initialize itself.
"""
self._verbose = 0
self._result = None
def setVerbosity(self, verbose):
"""
Set the verbosity.
"""
self._verbose = verbose
def _p(self, msg):
"""
Print a message only if being verbose.
"""
if self._verbose:
sys.stderr.write(str(msg) + os.linesep)
def _makeDocList(self, *args):
"""
Make a list of Document objects.
"""
doc_list = []
for a in args:
if type(a) == type(''):
doc_list.append(parseString(a))
else:
doc_list.append(parse(a))
return doc_list
def _concatPath(self, p1, p2, separator='/'):
"""
Concatenate 'p1' and 'p2'. Add a separator between them,
only if 'p1' does not end with a separator.
"""
if p1.endswith(separator):
return p1 + p2
return p1 + separator + p2
def _getResultRoot(self):
"""
Return the root element of the result document.
"""
return self._result.documentElement
def _xupdateAppendAttributes(self, dict, path):
"""
Append attributes to the element at 'path'.
"""
root = self._getResultRoot()
createElement = self._result.createElement
createTextNode = self._result.createTextNode
append_element = createElement('xupdate:append')
append_element.setAttribute('select', path)
for name, val in dict.iteritems():
attr_element = createElement('xupdate:attribute')
attr_element.setAttribute('name', name)
text_node = createTextNode(val)
attr_element.appendChild(text_node)
append_element.appendChild(attr_element)
root.appendChild(append_element)
def _xupdateRemoveAttribute(self, name, path):
"""
Remove an attribute from the element at 'path'.
"""
root = self._getResultRoot()
createElement = self._result.createElement
remove_element = createElement('xupdate:remove')
remove_element.setAttribute('select', self._concatPath(path, 'attribute::' + name))
root.appendChild(remove_element)
def _xupdateUpdateAttribute(self, name, val, path):
"""
Update the value of an attribute of the element at 'path'.
"""
root = self._getResultRoot()
createElement = self._result.createElement
createTextNode = self._result.createTextNode
update_element = createElement('xupdate:update')
update_element.setAttribute('select', self._concatPath(path, 'attribute::' + name))
text_node = createTextNode(val)
update_element.appendChild(text_node)
root.appendChild(update_element)
def _xupdateRenameElement(self, name, path):
"""
Rename an existing element at 'path'.
"""
root = self._getResultRoot()
createElement = self._result.createElement
createTextNode = self._result.createTextNode
rename_element = createElement('xupdate:rename')
rename_element.setAttribute('select', path)
text_node = createTextNode(name)
rename_element.appendChild(text_node)
root.appendChild(rename_element)
def _xupdateUpdateElement(self, element, path):
"""
Update the contents of an element at 'path' to that of 'element'.
"""
root = self._getResultRoot()
createElement = self._result.createElement
update_element = createElement('xupdate:update')
update_element.setAttribute('select', path)
for node in element.childNodes:
#self._p("node is %s" % repr(node))
clone_node = node.cloneNode(1)
update_element.appendChild(clone_node)
root.appendChild(update_element)
def _xupdateRemoveElement(self, path):
"""
Remove an element at 'path'.
"""
root = self._getResultRoot()
createElement = self._result.createElement
remove_element = createElement('xupdate:remove')
remove_element.setAttribute('select', path)
root.appendChild(remove_element)
def _xupdateInsertBefore(self, element_list, path):
"""
Insert elements before the element at 'path'.
"""
root = self._getResultRoot()
createElement = self._result.createElement
createTextNode = self._result.createTextNode
insert_element = createElement('xupdate:insert-before')
insert_element.setAttribute('select', path)
for element in element_list:
child_element = createElement('xupdate:element')
child_element.setAttribute('name', element.tagName)
attr_map = element.attributes
for i in range(attr_map.length):
attr = attr_map.item(i)
attr_element = createElement('xupdate:attribute')
attr_element.setAttribute('name', attr.name)
text_node = createTextNode(attr.nodeValue)
attr_element.appendChild(text_node)
child_element.appendChild(attr_element)
for child in element.childNodes:
clone_node = child.cloneNode(1)
child_element.appendChild(clone_node)
insert_element.appendChild(child_element)
root.appendChild(insert_element)
def _xupdateAppendElements(self, element_list, path):
"""
Append elements to the element at 'path'.
"""
root = self._getResultRoot()
createElement = self._result.createElement
createTextNode = self._result.createTextNode
append_element = createElement('xupdate:append')
append_element.setAttribute('select', path)
for element in element_list:
child_element = createElement('xupdate:element')
child_element.setAttribute('name', element.tagName)
attr_map = element.attributes
for i in range(attr_map.length):
attr = attr_map.item(i)
attr_element = createElement('xupdate:attribute')
attr_element.setAttribute('name', attr.name)
text_node = createTextNode(attr.nodeValue)
attr_element.appendChild(text_node)
child_element.appendChild(attr_element)
for child in element.childNodes:
clone_node = child.cloneNode(1)
child_element.appendChild(clone_node)
append_element.appendChild(child_element)
root.appendChild(append_element)
def _testElements(self, element1, element2):
"""
Test if two given elements are matching. Matching does not mean that they are identical.
"""
# Make sure that they are elements.
if element1.nodeType != element2.nodeType or element1.nodeType != element1.ELEMENT_NODE:
return 0
if element1.tagName != element2.tagName:
return 0
id_list = []
for attr_map in (element1.attributes, element2.attributes):
for i in range(attr_map.length):
attr = attr_map.item(i)
if attr.name == 'id':
id_list.append(attr.nodeValue)
break
if len(id_list) == 0:
return 1
if len(id_list) == 1:
return 0
return (id_list[0] == id_list[1])
def _testAttributes(self, element1, element2, path):
"""
Test attributes of two given elements. Add differences, if any.
"""
# Make a list of dictionaries of the attributes.
dict_list = []
for attr_map in (element1.attributes, element2.attributes):
dict = {}
for i in range(attr_map.length):
attr = attr_map.item(i)
dict[attr.name] = attr.nodeValue
dict_list.append(dict)
dict1, dict2 = dict_list
# Find all added or removed or changed attributes.
for name1, val1 in dict1.iteritems():
if name1 in dict2:
if val1 != dict2[name1]:
# The value is different.
self._xupdateUpdateAttribute(name1, dict2[name1], path)
# Mark this attribute.
dict2[name1] = None
else:
# This attribute is removed.
self._xupdateRemoveAttribute(name1, path)
dict = {}
for name2, val2 in dict2.iteritems():
if val2 is not None:
# This attribute is added.
dict[name2] = val2
if dict != {}:
self._xupdateAppendAttributes(dict, path)
def _checkEmptiness(self, element):
"""
Check if an element has child values.
"""
for child in element.childNodes:
if child.nodeType == child.ELEMENT_NODE or child.nodeType == child.TEXT_NODE:
return 0
return 1
def _checkIgnoreText(self, element):
"""
Determine if text should be ignored by heuristics,
because ERP5 does not define any schema at the moment.
"""
for child in element.childNodes:
if child.nodeType == child.ELEMENT_NODE:
return 1
return 0
def _makeRelativePathList(self, element_list):
"""
Make a list of relative paths from a list of elements.
"""
num_map = {}
count_map = {}
for element in element_list:
if element.tagName in num_map:
num_map[element.tagName] += 1
else:
num_map[element.tagName] = 1
count_map[element.tagName] = 0
path_list = []
for element in element_list:
# Check if this element has an attribute 'id'.
id_val = None
attr_map = element.attributes
for i in range(attr_map.length):
attr = attr_map.item(i)
if attr.name == 'id':
id_val = attr.nodeValue
break
if id_val is not None:
# If an attribute 'id' is present, uses the attribute for convenience.
path_list.append("%s[@id='%s']" % (element.tagName, id_val))
# Increase the count, for a case where other elements with the same tag name do not have
# 'id' attributes.
count_map[element.tagName] += 1
elif num_map[element.tagName] > 1:
path_list.append('%s[%d]' % (element.tagName, count_map[element.tagName]))
count_map[element.tagName] += 1
else:
path_list.append(element.tagName)
return path_list
def _aggregateElements(self, element):
"""
Aggregate child elements of an element into a list.
"""
element_list = []
for child in element.childNodes:
if child.nodeType == child.ELEMENT_NODE:
element_list.append(child)
return element_list
def _aggregateText(self, element):
"""
Aggregate child text nodes of an element into a single string.
"""
text = ''
for child in element.childNodes:
if child.nodeType == child.TEXT_NODE:
text += child.nodeValue
return text
def _compareChildNodes(self, old_element, new_element, path):
"""
Compare children of two elements, and add differences into the result, if any.
Call itself recursively, if these elements have grandchilden.
"""
self._p("Comparing %s with %s at %s..." % (repr(old_element), repr(new_element), path))
# First, determine if they are empty.
old_is_empty = self._checkEmptiness(old_element)
new_is_empty = self._checkEmptiness(new_element)
if old_is_empty and new_is_empty:
# Nothing to do.
self._p("Both are empty.")
pass
elif old_is_empty or new_is_empty:
# Perhaps they are very different.
self._p("One of them is empty, so just update all the contents.")
self._xupdateUpdateElement(new_element, path)
else:
# Second, determine if text should be ignored.
old_ignore_text = self._checkIgnoreText(old_element)
new_ignore_text = self._checkIgnoreText(new_element)
if old_ignore_text != new_ignore_text:
# This means that the semantics of this element is quite different.
self._p("One of them has only text and the other does not, so just update all the contents.")
self._xupdateUpdateElement(new_element, path)
elif not old_ignore_text:
# The contents are only text.
self._p("Both have only text.")
old_text = self._aggregateText(old_element)
new_text = self._aggregateText(new_element)
if old_text != new_text:
self._p("They differ, so update the elements.")
self._xupdateUpdateElement(new_element, path)
else:
# The contents are elements.
self._p("Both have elements.")
old_list = self._aggregateElements(old_element)
path_list = self._makeRelativePathList(old_list)
new_list = self._aggregateElements(new_element)
new_start = 0
new_len = len(new_list)
for old_node, node_path in zip(old_list, path_list):
child_path = self._concatPath(path, node_path)
for new_current in range(new_start, new_len):
new_node = new_list[new_current]
if self._testElements(old_node, new_node):
self._testAttributes(old_node, new_node, child_path)
self._compareChildNodes(old_node, new_node, child_path)
if new_current > new_start:
# There are skipped nodes in the new children.
self._xupdateInsertBefore(new_list[new_start:new_current], child_path)
new_start = new_current + 1
break
else:
# There is no matching node. So this element must be removed.
self._xupdateRemoveElement(child_path)
if new_len > new_start:
# There are remaining nodes in the new children.
self._xupdateAppendElements(new_list[new_start:new_len], path)
def compare(self, old_xml, new_xml):
"""
Compare two given XML documents.
If an argument is a string, it is assumed to be a XML document itself.
Otherwise, it is assumed to be a file object which contains a XML document.
"""
old_doc, new_doc = self._makeDocList(old_xml, new_xml)
old_root_element = old_doc.documentElement
new_root_element = new_doc.documentElement
try:
impl = getDOMImplementation()
# XXX this namespace argument won't be handled correctly in minidom.
# XXX So work around that problem when outputting the result.
if self._result is not None:
self._result.close()
self._result = impl.createDocument('http://www.xmldb.org/xupdate', 'xupdate:modifications', None)
if self._testElements(old_root_element, new_root_element):
self._testAttributes(old_root_element, new_root_element, '/')
self._compareChildNodes(old_root_element, new_root_element, '/')
else:
# These XML documents seem to be completely different...
if old_root_element.tagName != new_root_element.tagName:
self._xupdateRenameElement(new_root_element.tagName, '/')
self._testAttributes(old_root_element, new_root_element, '/')
self._xupdateUpdateElement(new_root_element, '/')
finally:
old_doc.unlink()
new_doc.unlink()
def output(self, file=None):
"""
Output the result of parsing XML documents to 'file'.
If it is not specified, stdout is assumed.
"""
if file is None:
file = sys.stdout
# Make sure that the output will be encoded in UTF-8.
writer = codecs.getwriter('utf-8')
file = writer(file)
# XXX minidom is too buggy, so it is required to write this method myself.
file.write('''<?xml version="1.0"?>
<xupdate:modifications version="1.0" xmlns:xupdate="http://www.xmldb.org/xupdate">
''')
for node in self._result.documentElement.childNodes:
node.writexml(file)
file.write('''</xupdate:modifications>
''')
def outputString(self):
"""
Return the result as a string object.
"""
io = StringIO()
self.output(io)
ret = io.getvalue()
io.close()
return ret
def main():
"""
The main routine of ERP5Diff.
"""
try:
opts, args = getopt.getopt(sys.argv[1:], "ho:v", ["help", "output=", "verbose"])
except getopt.GetoptError, msg:
print msg
print "Try ``erp5diff --help'' for more information."
sys.exit(2)
output = None
verbose = 0
for o, a in opts:
if o == "-v":
verbose = 1
elif o in ("-h", "--help"):
print '''Usage: erp5diff [OPTION]... OLD_XML NEW_XML
Make a difference between two XML documents in XUpdate format.
-h, --help display this message and exit
-o, --output=FILE output the result to the file FILE
-v, --verbose print verbose messages
Report bugs to <yo@nexedi.com>.'''
sys.exit()
elif o in ("-o", "--output"):
output = a
if len(args) != 2:
if len(args) > 2:
print "Too many arguments."
else:
print "Too few arguments."
print "Try ``erp5diff --help'' for more information."
sys.exit(2)
d = ERP5Diff()
d.setVerbosity(verbose)
old_xml = open(args[0])
new_xml = open(args[1])
d.compare(old_xml, new_xml)
old_xml.close()
new_xml.close()
try:
if output is not None:
file = open(output, 'w')
else:
file = None
d.output(file)
except:
if output is not None:
file.close()
os.remove(output)
raise
else:
if file is not None:
file.close()
sys.exit()
if __name__ == '__main__':
main()
This is a XUpdate Generator for ERP5.
See <http://www.xmldb.org/xupdate/index.html> for information on
XUpdate.
See <http://erp5.org/> for information on ERP5.
For the installation, do "python setup.py install".
Once you have installed erp5diff, you can use "erp5diff" in a shell:
$ erp5diff old.xml new.xml
See the manpage erp5diff(1) or "erp5diff --help" for more information.
Also, you can use the module ERP5Diff from your Python script.
Do "pydoc ERP5Diff" for more information.
- 2003-12-04, Yoshinori OKUJI <yo@nexedi.com>
#! /usr/bin/python
##############################################################################
#
# Yoshinori OKUJI <yo@nexedi.com>
#
# Copyright (C) 2003 Nexedi SARL
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ?See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ?02111-1307, USA.
#
##############################################################################
from ERP5Diff import main
main()
\ No newline at end of file
.TH ERP5DIFF 1 "4 Dec 2003" "ERP5DIFF version 0.1" Nexedi
.SH NAME
erp5diff \- find differences between two XML documents for ERP5
.SH SYNOPSIS
.B erp5diff
[\fIoptions\fR]...
.LP
.SH DESCRIPTION
ERP5Diff is a XUpdate Generator for ERP5. It takes two XML files
as input data, and generates differences between these two XML
documents in XUpdate language.
.LP
ERP5Diff depends on more or less ERP5's XML data format. So this tool
cannot be used for general purpose, but might work if your XML files
are similar to ERP5's.
.SH OPTIONS
.TP
\fB\-o\fR, \fB\-\-output\fR=\fIFILE\fR
Specify the output file. The standard output is used by default.
.TP
\fB\-h\fR, \fB\-\-help\fR
Display the usage and exit.
.TP
\fB\-v\fR, \fB\-\-verbose\fR
Print verbose messages. Only useful for debugging.
.SH AUTHOR
Yoshinori OKUJI <yo@nexedi.com>
.SH "SEE ALSO"
\fIhttp://www.xmldb.org/xupdate/index.html\fR,
\fIhttp://www.w3.org/TR/xpath\fR,
\fIhttp://www.w3.org/TR/REC-xml\fR,
\fIhttp://erp5.org\fR
#! /usr/bin/env python
from distutils.core import setup
setup(name="erp5diff",
version="0.1",
description="XUpdate Generator for ERP5",
author="Yoshinori OKUJI",
author_email="yo@nexedi.com",
url="http://nexedi.com",
license="GPL",
py_modules=["ERP5Diff"],
scripts=["erp5diff"],
data_files=[('share/man/man1', ['erp5diff.1'])]
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment