This is a summary of the general approach to reading XML using the DOM in Python
#!/usr/bin/env python3 # Load minidom from xml.dom import minidom # Read a filename from the command line import sys progName = sys.argv.pop(0) fileName = sys.argv.pop(0)
doc = minidom.parse(fileName)
tagnameElementSet = doc.getElementsByTagName('tagname')
tagnameElement = tagnameElementSet.item(0) - or - tagnameElement = tagnameElementSet[0]
tagnameTextNode = tagnameElement.firstChild
tagnameText = tagnameTextNode.data
attribureText = tagnameElement.getAttribute('attr-name')
This is the 'obvious' way to do it
#!/usr/bin/env python3
from xml.dom import minidom
doc = minidom.parse('test.xml')
for species in doc.getElementsByTagName('species'):
speciesName = species.getAttribute('name')
commonName = species.getElementsByTagName('common-name')[0].firstChild.data
conservation = species.getElementsByTagName('conservation')[0].getAttribute('status')
print ("%s (%s) %s" % (commonName, speciesName, conservation))
This is now my preferred approach - don't worry about getting the first item, simply do everything as a loop so the loops exactly mirror the structure of the XML.
#!/usr/bin/env python3
from xml.dom import minidom
doc = minidom.parse('test.xml')
for species in doc.getElementsByTagName('species'):
speciesName = species.getAttribute('name')
for commonNameElement in species.getElementsByTagName('common-name'):
commonName = commonNameElement.firstChild.data
for conservationElement in species.getElementsByTagName('conservation'):
conservation = conservationElement.getAttribute('status')
print ("%s (%s) %s" % (commonName, speciesName, conservation))