Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Python XML

XML

XML Data

  • xml

{% embed include file="src/examples/xml/data.xml)

Expat - Callbacks

  • xml.parsers.expat
import xml.parsers.expat
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

file = sys.argv[1]


def start_element(name, attrs):
    print('Start element: {} {}'.format(name, attrs))


def end_element(name):
    print('End element: {}'.format(name))


def char_data(data):
    print('Character data: {}'.format(repr(data)))


p = xml.parsers.expat.ParserCreate()

p.StartElementHandler = start_element
p.EndElementHandler = end_element
p.CharacterDataHandler = char_data

p.ParseFile(open(file, 'rb'))

print('done')

XML DOM - Document Object Model

import xml.dom.minidom
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

file = sys.argv[1]

dom = xml.dom.minidom.parse(file)

root = dom.firstChild
print(root.tagName)

print('')

for node in root.childNodes:
    if node.nodeType != node.TEXT_NODE:
        print('name: ', node.tagName)
        print('id: ', node.getAttribute('id'))

print('')

emails = dom.getElementsByTagName("email")
for e in emails:
    print('email', e.getAttribute('id'), e.firstChild.data)
main

name:  person
id:  1
name:  person
id:  3

email home moo@zorghome.com
email work moo@work.com

XML SAX - Simple API for XML

import xml.sax
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

file = sys.argv[1]

class EventHandler(xml.sax.ContentHandler):
    def startElement(self, name, attrs):
        print('start', (name, attrs._attrs))

    def characters(self, text):
        if not text.isspace():
            print('text', text)

    def endElement(self, name):
        print('end', name)


xml.sax.parse(file, EventHandler())

start (u'main', {})
start (u'person', {u'id': u'1'})
start (u'fname', {})
text Foo
end fname
start (u'lname', {})
text Bar
end lname
end person
start (u'person', {u'id': u'3'})
start (u'fname', {})
text Moo
end fname
start (u'lname', {})
text Zorg
end lname
start (u'email', {u'id': u'home'})
text moo@zorghome.com
end email
start (u'email', {u'id': u'work'})
text moo@work.com
end email
end person
end main

SAX collect

import xml.sax
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

file = sys.argv[1]

class EventHandler(xml.sax.ContentHandler):
    def __init__(self, c):
        self.path = []
        self.collector = c

    def startElement(self, name, attrs):
        self.path.append({ 'name' : name, 'attr' : attrs._attrs })

    def characters(self, text):
        self.path[-1]['text'] = text

    def endElement(self, name):
        element = self.path.pop()
        print('End name: ', name)
        if element['name'] == 'email':
            collector.append(element)

collector = []
xml.sax.parse(file, EventHandler(collector))
print(collector)
End name:  fname
End name:  lname
End name:  person
End name:  fname
End name:  lname
End name:  email
End name:  email
End name:  person
End name:  main
[{'text': u'moo@zorghome.com', 'name': u'email', 'attr': {u'id': u'home'}},
 {'text': u'moo@work.com', 'name': u'email', 'attr': {u'id': u'work'}}]

XML elementtree

import xml.etree.ElementTree as ET
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

file = sys.argv[1]

tree = ET.parse(file)
root = tree.getroot()
print(root.tag)

for p in root.iter('person'):
    print(p.attrib)

print('')

for p in root.iter('email'):
    print(p.attrib, p.text)

print('')

elements = tree.findall(".//*[@id='home']")
for e in elements:
    print(e.tag, e.attrib)
main
{'id': '1'}
{'id': '3'}

{'id': 'home'} moo@zorghome.com
{'id': 'work'} moo@work.com

email {'id': 'home'}

SAX with coroutine

import xml.sax

file = 'examples/xml/data.xml'

class EventHandler(xml.sax.ContentHandler):
    def __init__(self,target):
        self.target = target
    def startElement(self,name,attrs):
        self.target.send(('start',(name,attrs._attrs)))
    def characters(self,text):
        self.target.send(('text',text))
    def endElement(self,name):
        self.target.send(('end',name))

def printer():
    def start(*args,**kwargs):
        cr = func(*args,**kwargs)
        cr.next()
        return cr
    return start

# example use
if __name__ == '__main__':
    @coroutine
    def printer():
        while True:
            event = (yield)
            print(event)

    xml.sax.parse(file, EventHandler(printer()))



copied from Stack Overflow based on coroutines

import xml.sax

file = 'examples/xml/data.xml'

class EventHandler(xml.sax.ContentHandler):
    def __init__(self,target):
        self.target = target
    def startElement(self,name,attrs):
        self.target.send(('start',(name,attrs._attrs)))
    def characters(self,text):
        self.target.send(('text',text))
    def endElement(self,name):
        self.target.send(('end',name))

def coroutine(func):
    def start(*args,**kwargs):
        cr = func(*args,**kwargs)
        cr.next()
        return cr
    return start

# example use
if __name__ == '__main__':
    @coroutine
    def printer():
        while True:
            event = (yield)
            print(event)

    xml.sax.parse(file, EventHandler(printer()))