Python XML
XML
XML Data
- xml
{% embed include file="src/examples/xml/data.xml)
Expat - Callbacks
- xml.parsers.expat
import xml.parsers.expat
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
file = sys.argv[1]
def start_element(name, attrs):
print('Start element: {} {}'.format(name, attrs))
def end_element(name):
print('End element: {}'.format(name))
def char_data(data):
print('Character data: {}'.format(repr(data)))
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = start_element
p.EndElementHandler = end_element
p.CharacterDataHandler = char_data
p.ParseFile(open(file, 'rb'))
print('done')
XML DOM - Document Object Model
import xml.dom.minidom
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
file = sys.argv[1]
dom = xml.dom.minidom.parse(file)
root = dom.firstChild
print(root.tagName)
print('')
for node in root.childNodes:
if node.nodeType != node.TEXT_NODE:
print('name: ', node.tagName)
print('id: ', node.getAttribute('id'))
print('')
emails = dom.getElementsByTagName("email")
for e in emails:
print('email', e.getAttribute('id'), e.firstChild.data)
main
name: person
id: 1
name: person
id: 3
email home moo@zorghome.com
email work moo@work.com
XML SAX - Simple API for XML
import xml.sax
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
file = sys.argv[1]
class EventHandler(xml.sax.ContentHandler):
def startElement(self, name, attrs):
print('start', (name, attrs._attrs))
def characters(self, text):
if not text.isspace():
print('text', text)
def endElement(self, name):
print('end', name)
xml.sax.parse(file, EventHandler())
start (u'main', {})
start (u'person', {u'id': u'1'})
start (u'fname', {})
text Foo
end fname
start (u'lname', {})
text Bar
end lname
end person
start (u'person', {u'id': u'3'})
start (u'fname', {})
text Moo
end fname
start (u'lname', {})
text Zorg
end lname
start (u'email', {u'id': u'home'})
text moo@zorghome.com
end email
start (u'email', {u'id': u'work'})
text moo@work.com
end email
end person
end main
SAX collect
import xml.sax
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
file = sys.argv[1]
class EventHandler(xml.sax.ContentHandler):
def __init__(self, c):
self.path = []
self.collector = c
def startElement(self, name, attrs):
self.path.append({ 'name' : name, 'attr' : attrs._attrs })
def characters(self, text):
self.path[-1]['text'] = text
def endElement(self, name):
element = self.path.pop()
print('End name: ', name)
if element['name'] == 'email':
collector.append(element)
collector = []
xml.sax.parse(file, EventHandler(collector))
print(collector)
End name: fname
End name: lname
End name: person
End name: fname
End name: lname
End name: email
End name: email
End name: person
End name: main
[{'text': u'moo@zorghome.com', 'name': u'email', 'attr': {u'id': u'home'}},
{'text': u'moo@work.com', 'name': u'email', 'attr': {u'id': u'work'}}]
XML elementtree
import xml.etree.ElementTree as ET
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
file = sys.argv[1]
tree = ET.parse(file)
root = tree.getroot()
print(root.tag)
for p in root.iter('person'):
print(p.attrib)
print('')
for p in root.iter('email'):
print(p.attrib, p.text)
print('')
elements = tree.findall(".//*[@id='home']")
for e in elements:
print(e.tag, e.attrib)
main
{'id': '1'}
{'id': '3'}
{'id': 'home'} moo@zorghome.com
{'id': 'work'} moo@work.com
email {'id': 'home'}
SAX with coroutine
import xml.sax
file = 'examples/xml/data.xml'
class EventHandler(xml.sax.ContentHandler):
def __init__(self,target):
self.target = target
def startElement(self,name,attrs):
self.target.send(('start',(name,attrs._attrs)))
def characters(self,text):
self.target.send(('text',text))
def endElement(self,name):
self.target.send(('end',name))
def printer():
def start(*args,**kwargs):
cr = func(*args,**kwargs)
cr.next()
return cr
return start
# example use
if __name__ == '__main__':
@coroutine
def printer():
while True:
event = (yield)
print(event)
xml.sax.parse(file, EventHandler(printer()))
copied from Stack Overflow based on coroutines
import xml.sax
file = 'examples/xml/data.xml'
class EventHandler(xml.sax.ContentHandler):
def __init__(self,target):
self.target = target
def startElement(self,name,attrs):
self.target.send(('start',(name,attrs._attrs)))
def characters(self,text):
self.target.send(('text',text))
def endElement(self,name):
self.target.send(('end',name))
def coroutine(func):
def start(*args,**kwargs):
cr = func(*args,**kwargs)
cr.next()
return cr
return start
# example use
if __name__ == '__main__':
@coroutine
def printer():
while True:
event = (yield)
print(event)
xml.sax.parse(file, EventHandler(printer()))