1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
from xml.dom.minidom import parse, parseString, Node
from xml.sax.saxutils import unescape
# an extremely simple system for loading in XML into objects
class Object:
pass
class XMLObject:
def __init__(self):
self.elements = Object()
self.first = Object()
self.attributes = {}
self.text = ''
def getElementNames(self):
return [element for element in dir(self.elements)
if not element.startswith('__')]
def getAttributes(self):
return self.attributes
def elementToObject(parent, node):
# create an object to represent element node
object = XMLObject()
# make object attributes off node attributes
for key, value in node.attributes.items():
object.attributes[key] = value
# make lists of child elements (or ignore them)
for child in node.childNodes:
nodeToObject(object, child)
# add ourselves to parent node
name = str(node.nodeName)
l = getattr(parent.elements, name, [])
l.append(object)
setattr(parent.elements, name, l)
def attributeToObject(parent, node):
# should never be called
pass
def textToObject(parent, node):
# add this text to parents text content
parent.text += unescape(node.data)
def processingInstructionToObject(parent, node):
# don't do anything with these
pass
def commentToObject(parent, node):
# don't do anything with these
pass
def documentToObject(parent, node):
elementToObject(parent, node.documentElement)
def documentTypeToObject(parent, node):
# don't do anything with these
pass
_map = {
Node.ELEMENT_NODE: elementToObject,
Node.ATTRIBUTE_NODE: attributeToObject,
Node.TEXT_NODE: textToObject,
# Node.CDATA_SECTION_NODE:
# Node.ENTITY_NODE:
Node.PROCESSING_INSTRUCTION_NODE: processingInstructionToObject,
Node.COMMENT_NODE: commentToObject,
Node.DOCUMENT_NODE: documentToObject,
Node.DOCUMENT_TYPE_NODE: documentTypeToObject,
# Node.NOTATION_NODE:
}
def nodeToObject(parent, node):
_map[node.nodeType](parent, node)
def simplify_single_entries(object):
for name in object.getElementNames():
l = getattr(object.elements, name)
# set the first subelement (in case it's just one, this is easy)
setattr(object.first, name, l[0])
# now do the same for rest
for element in l:
simplify_single_entries(element)
def XMLToObjectsFromFile(path):
return XMLToObjects(parse(path))
def XMLToObjectsFromString(s):
return XMLToObjects(parseString(s))
def XMLToObjects(document):
object = XMLObject()
documentToObject(object, document)
document.unlink()
simplify_single_entries(object)
return object