XML (eXtensible Markup Language) is a standard for markup languages created by W3C.
It is a sub set of SGML (Standard Generalized Markup Language).
It is widely used for all sort sof data and data description.
Most modern programming languages and runtimes have good support for XML.
In the following sections we will see various techniques to read, update and write XML using various programming languages and XML libraries.
The examples will use the following super simple XML file.
employees.xml:
<?xml version='1.0' standalone='yes'?>
<employees>
<employee no='1'>
<name>Alan A</name>
<role>Manager</role>
</employee>
<employee no='2'>
<name>Brian B</name>
<role>Engineer</role>
</employee>
<employee no='3'>
<name>Chris C</name>
<role>Sales rep</role>
</employee>
</employees>
The C and C++ examples will be shown in simple - possible a bit oldfashioned - C and C++. I will expect examples to work with any C 89 and C++ 98 compliant compilers.
W3C DOM is a W3C standard for an in memory tree structure reprsenting XML.
package xmlproc;
import java.io.File;
import java.io.FileWriter;
import java.io.Writer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSOutput;
import org.w3c.dom.ls.LSSerializer;
public class W3CDOM {
private final static String XML_FILE = "/work/employees.xml";
public static void main(String[] args) throws Exception {
// read from file to DOM tree
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new File(XML_FILE));
// iterate over all employee elements
NodeList employees = doc.getElementsByTagName("employee");
for (int i = 0; i < employees.getLength(); i++) {
Element employee = (Element)employees.item(i);
// find attribute no
String no = employee.getAttribute("no");
// find sub elements name and role
String name = "";
String role = "";
NodeList children = employee.getChildNodes();
for (int j = 0; j < children.getLength(); j++) {
Node child = children.item(j);
if(child.getNodeType() == Node.ELEMENT_NODE) {
if (child.getNodeName().equals("name")) {
name = child.getFirstChild().getNodeValue();
}
if (child.getNodeName().equals("role")) {
role = child.getFirstChild().getNodeValue();
}
}
}
// print
System.out.println("no=" + no);
System.out.println("name=" + name);
System.out.println("role=" + role);
}
// add employee
Element newname = doc.createElement("name");
newname.appendChild(doc.createTextNode("Dave D"));
Element newrole = doc.createElement("role");
newrole.appendChild(doc.createTextNode("Intern"));
Element newemployee = doc.createElement("employee");
newemployee.setAttribute("no", "4");
newemployee.appendChild(newname);
newemployee.appendChild(newrole);
doc.getDocumentElement().appendChild(newemployee);
// write out
DOMImplementation impl = DOMImplementationRegistry.newInstance().getDOMImplementation("XML 3.0");
DOMImplementationLS feature = (DOMImplementationLS)impl.getFeature("LS","3.0");
LSSerializer ser = feature.createLSSerializer();
ser.getDomConfig().setParameter("format-pretty-print", true);
LSOutput output = feature.createLSOutput();
Writer w = new FileWriter("/work/employees2.xml");
output.setCharacterStream(w);
ser.write(doc, output);
w.close();
}
}
Finding sub elements is a bit cumbersome (it can be done easier using XPath - see below). And the write out API really looks like design by committee.
Note that it uses the socalled abstract factory pattern. This allows one to change the implementation used.
How to:
System.setProperty("javax.xml.parsers.DocumentBuilderFactory", "com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"); // use default (copy of Apache Xerces)
System.setProperty("javax.xml.parsers.DocumentBuilderFactory", "org.apache.xerces.jaxp.DocumentBuilderFactoryImpl"); // use Apache Xerces
System.setProperty("javax.xml.parsers.DocumentBuilderFactory", "org.apache.crimson.jaxp.DocumentBuilderFactoryImpl"); // use old Crimson
using System;
using System.IO;
using System.Xml;
namespace XmlProc.W3CDOM
{
public class Program
{
private const String XML_FILE = @"C:\work\employees.xml";
public static void Main(string[] args)
{
// read from file to DOM tree
XmlDocument doc = new XmlDocument();
doc.Load(XML_FILE);
// iterate over all employee elements
foreach(XmlElement employee in doc.DocumentElement.GetElementsByTagName("employee"))
{
// find attribute no
string no = employee.Attributes["no"].Value;
// find sub elements name and role
string name = "";
string role = "";
foreach(XmlNode child in employee.ChildNodes)
{
if(child.NodeType == XmlNodeType.Element && child.Name == "name")
{
name = child.FirstChild.Value;
}
if(child.NodeType == XmlNodeType.Element && child.Name == "role")
{
role = child.FirstChild.Value;
}
}
// print
Console.WriteLine("no = " + no);
Console.WriteLine("name = " + name);
Console.WriteLine("role = " + role);
}
// add employee
XmlElement newname = doc.CreateElement("name");
newname.AppendChild(doc.CreateTextNode("Dave D"));
XmlElement newrole = doc.CreateElement("role");
newrole.AppendChild(doc.CreateTextNode("Intern"));
XmlElement newemployee = doc.CreateElement("employee");
XmlAttribute newno = doc.CreateAttribute("no");
newno.Value = "4";
newemployee.Attributes.Append(newno);
newemployee.AppendChild(newname);
newemployee.AppendChild(newrole);
doc.DocumentElement.AppendChild(newemployee);
// write out
using(StreamWriter sw = new StreamWriter(@"C:\work\employees2.xml"))
{
doc.Save(sw);
}
Console.ReadKey();
}
}
}
Finding sub elements is a bit cumbersome (it can be done easier using XPath - see below).
Imports System
Imports System.IO
Imports System.Xml
Namespace XmlProc.W3CDOM
Public Class Program
Private Const XML_FILE As String = "C:\work\employees.xml"
Public Shared Sub Main(args As String())
' read from file to DOM tree
Dim doc As New XmlDocument()
doc.Load(XML_FILE)
' iterate over all employee elements
For Each employee As XmlElement In doc.DocumentElement.GetElementsByTagName("employee")
' find attribute no
Dim no As String = employee.Attributes("no").Value
' find sub elements name and role
Dim name As String = ""
Dim role As String = ""
For Each child As XmlNode In employee.ChildNodes
If child.NodeType = XmlNodeType.Element AndAlso child.Name = "name" Then
name = child.FirstChild.Value
End If
If child.NodeType = XmlNodeType.Element AndAlso child.Name = "role" Then
role = child.FirstChild.Value
End If
Next
' print
Console.WriteLine("no = " & no)
Console.WriteLine("name = " & name)
Console.WriteLine("role = " & role)
Next
' add employee
Dim newname As XmlElement = doc.CreateElement("name")
newname.AppendChild(doc.CreateTextNode("Dave D"))
Dim newrole As XmlElement = doc.CreateElement("role")
newrole.AppendChild(doc.CreateTextNode("Intern"))
Dim newemployee As XmlElement = doc.CreateElement("employee")
Dim newno As XmlAttribute = doc.CreateAttribute("no")
newno.Value = "4"
newemployee.Attributes.Append(newno)
newemployee.AppendChild(newname)
newemployee.AppendChild(newrole)
doc.DocumentElement.AppendChild(newemployee)
' write out
Using sw As New StreamWriter("C:\work\employees2.xml")
doc.Save(sw)
End Using
Console.ReadKey()
End Sub
End Class
End Namespace
Finding sub elements is a bit cumbersome (it can be done easier using XPath - see below).
<html>
<head>
<title>W3C DOM</title>
</head>
<body>
<h1>W3C DOM</h1>
<table>
<tr>
<th>No</th>
<th>Name</th>
<th>Role</th>
</tr>
<?php
define('XML_FILE','/work/employees.xml');
// read from file to DOM tree
$doc = new DOMDocument();
$doc->load(XML_FILE);
// iterate over all employee elements
$employees = $doc->getElementsByTagName('employee');
foreach($employees as $employee) {
// find attribute no
$no = $employee->attributes->getNamedItem('no')->nodeValue;
// find sub elements name and role
$name = '';
$role = '';
foreach($employee->childNodes as $child) {
if($child->nodeType == XML_ELEMENT_NODE) {
if($child->nodeName == 'name') {
$name = $child->firstChild->nodeValue;
}
if($child->nodeName == 'role') {
$role = $child->firstChild->nodeValue;
}
}
}
// print
echo "<tr>\r\n";
echo "<td>$no</td>\r\n";
echo "<td>$name</td>\r\n";
echo "<td>$role</td>\r\n";
echo "</tr>\r\n";
}
echo "</table>\r\n";
// add employee
$newname = $doc->createElement('name');
$newname->appendChild($doc->createTextNode('Dave D'));
$role = $doc->createElement('role');
$role->appendChild($doc->createTextNode('Intern'));
$newemployee = $doc->createElement('employee');
$newemployee->setAttribute('no', '4');
$newemployee->appendChild($newname);
$newemployee->appendChild($role);
$doc->documentElement->appendChild($newemployee);
// write out
echo "<pre>\r\n";
echo str_replace(array('<','>'),array('<','>'),$doc->saveXML());
echo "<pre>\r\n";
?>
</body>
</html>
Finding sub elements is a bit cumbersome (it can be done easier using XPath - see below).
<html>
<head>
<title>W3C DOM</title>
</head>
<body>
<h1>W3C DOM</h1>
<table>
<tr>
<th>No</th>
<th>Name</th>
<th>Role</th>
</tr>
<%
NODE_ELEMENT = 1
' read from file to DOM tree
Set doc = CreateObject("MSXML.DOMDocument")
doc.Async = False
doc.Load("C:\work\employees.xml")
' iterate over all employee elements
Set employees = doc.GetElementsByTagName("employee")
For Each employee in employees
' find attribute no
no = employee.Attributes.GetNamedItem("no").Value
' find sub elements name and role
For Each child in employee.ChildNodes
If(child.nodeType = NODE_ELEMENT And child.NodeName = "name") Then
name = child.FirstChild.NodeValue
End If
If(child.nodeType = NODE_ELEMENT And child.NodeName = "role") Then
role = child.FirstChild.NodeValue
End If
Next
' print
Response.Write "<tr>" & vbCrLf
Response.Write "<td>" & no & "</td>" & vbCrLf
Response.Write "<td>" & name & "</td>" & vbCrLf
Response.Write "<td>" & role & "</td>" & vbCrLf
Response.Write "</tr>" & vbCrLf
Next
Response.Write "</table>" & vbCrLf
' add employee
Set name = doc.CreateElement("name")
name.AppendChild(doc.CreateTextNode("Dave D"))
Set role = doc.CreateElement("role")
role.AppendChild(doc.CreateTextNode("Intern"))
Set newemployee = doc.CreateElement("employee")
Call newemployee.SetAttribute("no", "4")
newemployee.AppendChild(name)
newemployee.AppendChild(role)
doc.DocumentElement.AppendChild(newemployee)
' write out
Response.Write "<pre>" & vbCrLf
Response.Write Replace(Replace(doc.XML, "<", "<"), ">", ">")
Response.Write "</pre>" & vbCrLf
' cleanup
Set employees = Nothing
Set name = Nothing
Set role = Nothing
Set newemployee = Nothing
Set doc = Nothing
%>
</body>
</html>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#define XML_FILE "employees.xml"
int main()
{
xmlDoc *doc;
xmlNode *root;
xmlNode *employee;
xmlNode *element;
xmlChar *no;
xmlChar *name;
xmlChar *role;
/* read from file to DOM tree */
doc = xmlReadFile(XML_FILE, NULL, 0);
if (doc == NULL)
{
fprintf(stderr, "Failed to parse %s\n", XML_FILE);
return EXIT_FAILURE;
}
/* iterate over all elements below root (employee elements) */
root = xmlDocGetRootElement(doc);
employee = root->children;
do
{
if(!xmlIsBlankNode(employee))
{
/* find attribute no */
no = xmlGetProp(employee, "no");
/* find sub elements name and role */
element = employee->children;
do
{
if(!xmlIsBlankNode(element))
{
if(strcmp(element->name, "name") == 0)
{
name = element->children->content;
}
if(strcmp(element->name, "role") == 0)
{
role = element->children->content;
}
}
element = element->next;
}
while(element != NULL);
/* print */
printf("no=%s\n", no);
printf("name=%s\n", name);
printf("role=%s\n", role);
}
employee = employee->next;
}
while(employee != NULL);
/* add employee */
employee = xmlNewNode(NULL, "employee");
xmlNewProp(employee, "no", "4");
element = xmlNewNode(NULL, "name");
xmlAddChild(element, xmlNewText("Dave D"));
xmlAddChild(employee, element);
element = xmlNewNode(NULL, "role");
xmlAddChild(element, xmlNewText("Intern"));
xmlAddChild(employee, element);
xmlAddChild(root, employee);
/* write out */
xmlDocDump(stdout, doc);
/* clean up */
xmlFreeDoc(doc);
xmlCleanupParser();
/* */
return EXIT_SUCCESS;
}
Windows GCC build:
gcc -m32 -Wall -Wno-pointer-sign -I%ICONVPATH%\include -I%LIBXML2PATH%\include\libxml2 -I%LIBXSLTPATH%\include %1.c -L%LIBXML2PATH%\lib -lxml2 -L%ICONVPATH%\lib -liconv -L%ZLIBPATH%\lib -lz -L%LIBXSLTPATH%\lib -lxslt -o %1.exe
#include <iostream>
#include <cstdlib>
using namespace std;
#include <xercesc/dom/DOM.hpp>
#include <xercesc/framework/XMLFormatter.hpp>
#include <xercesc/framework/LocalFileFormatTarget.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/XMLString.hpp>
using namespace xercesc;
const char *XML_FILE = "employees.xml";
// hackish utility functions - do not use in production
static XMLCh buf[10][1000];
static int bufix = -1;
static XMLCh *UTF16(const char *s)
{
bufix = (bufix + 1) % 10;
XMLString::transcode(s, buf[bufix], sizeof(buf[bufix]) - 1);
return buf[bufix];
}
static char *UTF8(const XMLCh *s)
{
return XMLString::transcode(s);
}
int main()
{
try
{
XMLPlatformUtils::Initialize();
// read from file to DOM tree
XercesDOMParser *parser = new XercesDOMParser();
parser->setErrorHandler((ErrorHandler*) new HandlerBase());
parser->parse(XML_FILE);
DOMDocument *doc = parser->getDocument();
// iterate over all employee elements
DOMNodeList *employees = doc->getElementsByTagName(UTF16("employee"));
for(int i = 0; i < employees->getLength(); i++)
{
DOMElement *employee = (DOMElement *)employees->item(i);
// find attribute no
int no = atoi(UTF8(employee->getAttribute(UTF16("no"))));
// find sub elements name and role
DOMNodeList *children = employee->getChildNodes();
char *name = NULL;
char *role = NULL;
for(int j = 0; j < children->getLength(); j++) {
DOMNode *child = children->item(j);
if(child->getNodeType() == DOMNode::NodeType::ELEMENT_NODE)
{
if(strcmp(UTF8(child->getNodeName()), "name") == 0)
{
name = UTF8(child->getFirstChild()->getNodeValue());
}
if(strcmp(UTF8(child->getNodeName()), "role") == 0)
{
role = UTF8(child->getFirstChild()->getNodeValue());
}
}
}
// print
cout << "no = " << no << endl;
cout << "name = " << name << endl;
cout << "role = " << role << endl;
}
// add employee
DOMElement *newname = doc->createElement(UTF16("name"));
newname->appendChild(doc->createTextNode(UTF16("Dave D")));
DOMElement *newrole = doc->createElement(UTF16("role"));
newrole->appendChild(doc->createTextNode(UTF16("Intern")));
DOMElement *newemployee = doc->createElement(UTF16("employee"));
newemployee->setAttribute(UTF16("no"), UTF16("4"));
newemployee->appendChild(newname);
newemployee->appendChild(newrole);
doc->getDocumentElement()->appendChild(newemployee);
// write out
DOMImplementation *impl = DOMImplementationRegistry::getDOMImplementation(UTF16("LS"));
DOMLSSerializer *ser = ((DOMImplementationLS *)impl)->createLSSerializer();
ser->getDomConfig()->setParameter(XMLUni::fgDOMWRTFormatPrettyPrint, true);
DOMLSOutput *output = ((DOMImplementationLS*)impl)->createLSOutput();
XMLFormatTarget *ft = new LocalFileFormatTarget(UTF16("employees2.xml"));
output->setByteStream(ft);
ser->write(doc, output);
output->release();
//
doc->release();
XMLPlatformUtils::Terminate();
}
catch (const XMLException &ex)
{
cout << UTF8(ex.getMessage()) << endl;
exit(1);
}
catch (const DOMException &ex)
{
cout << UTF8(ex.getMessage()) << endl;
exit(1);
}
catch (...)
{
cout << "Ooops" << endl;
exit(1);
}
return 0;
}
#include <iostream>
using namespace std;
#include <QtCore/QFile>
#include <QtXML/QDomDocument>
const char *XML_FILE = "employees.xml";
void realmain()
{
QDomDocument doc;
doc.setContent(new QFile(XML_FILE));
// iterate over all employee elements
QDomNodeList employees = doc.elementsByTagName("employee");
for(int i = 0; i < employees.length(); i++)
{
QDomElement employee = employees.at(i).toElement();
// find attribute no
int no = employee.attribute("no").toInt();
// find sub elements name and role
QDomNodeList children = employee.childNodes();
string name = "";
string role = "";
for(int j = 0; j < children.length(); j++) {
QDomNode child = children.at(j);
if(child.nodeType() == QDomNode::ElementNode)
{
if(child.nodeName() == "name")
{
name = child.firstChild().nodeValue().toStdString();
}
if(child.nodeName() == "role")
{
role = child.firstChild().nodeValue().toStdString();
}
}
}
// print
cout << "no = " << no << endl;
cout << "name = " << name << endl;
cout << "role = " << role << endl;
}
// add employee
QDomElement newname = doc.createElement("name");
newname.appendChild(doc.createTextNode("Dave D"));
QDomElement newrole = doc.createElement("role");
newrole.appendChild(doc.createTextNode("Intern"));
QDomElement newemployee = doc.createElement("employee");
newemployee.setAttribute("no", "4");
newemployee.appendChild(newname);
newemployee.appendChild(newrole);
doc.documentElement().appendChild(newemployee);
// write out
QFile file2("employees2.xml");
file2.open(QIODevice::WriteOnly);
file2.write(doc.toByteArray(4));
file2.close();
}
// standard C++ headers
#include <iostream>
#include <cstdlib>
using namespace std;
// Windows headers
#include <windows.h>
#include <tchar.h>
#include <comutil.h>
#include <atlconv.h>
#include <msxml.h>
const wchar_t *XML_FILE = L"employees.xml";
void ReturnCheck(LPTSTR func, HRESULT res)
{
if(res != S_OK)
{
TCHAR buffer[1000];
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, res, 0, buffer, sizeof(buffer), 0);
cout << func << ": " << buffer << endl;
exit(1);
}
}
int main()
{
USES_CONVERSION;
HRESULT res;
VARIANT_BOOL res2;
//
CoInitialize(NULL);
// read from file to DOM tree
IXMLDOMDocument *doc;
res = CoCreateInstance(CLSID_DOMDocument, NULL, CLSCTX_INPROC_SERVER, IID_IXMLDOMDocument, (void**)&doc);
ReturnCheck(_T("CoCreateInstance"), res);
doc->put_async(false);
res = doc->load(_variant_t(XML_FILE), &res2);
ReturnCheck(_T("load"), res);
// iterate over all employee elements
IXMLDOMNodeList *employees;
res = doc->getElementsByTagName(L"employee", &employees);
ReturnCheck(_T("getElementsByTagName"), res);
long len;
res = employees->get_length(&len);
ReturnCheck(_T("get_length"), res);
IXMLDOMNode *xemployee;
for(int i = 0; i < len; i++)
{
res = employees->get_item(i, &xemployee);
ReturnCheck(_T("get_item"), res);
IXMLDOMElement *employee;
res = xemployee->QueryInterface(__uuidof(IXMLDOMElement), (void **)&employee);
ReturnCheck(_T("QueryInterface"), res);
// find attribute no
_variant_t xno;
res = employee->getAttribute(L"no", &xno);
ReturnCheck(_T("getAttribute"), res);
int no = atoi(W2A((wchar_t *)(_bstr_t)xno));
// iterate over all employee elements
IXMLDOMNodeList *children;
res = employee->get_childNodes(&children);
ReturnCheck(_T("get_childNodes"), res);
_variant_t xname;
_variant_t xrole;
char *name;
char *role;
long len2;
res = children->get_length(&len2);
ReturnCheck(_T("get_length"), res);
IXMLDOMNode *child;
for(int j = 0; j < len2; j++)
{
res = children->get_item(j, &child);
ReturnCheck(_T("get_item"), res);
DOMNodeType typ;
res = child->get_nodeType(&typ);
if(typ == NODE_ELEMENT)
{
BSTR nam;
res = child->get_nodeName(&nam);
ReturnCheck(_T("get_nodeName"), res);
if(wcscmp((wchar_t *)nam, L"name") == 0)
{
IXMLDOMNode *txt;
res = child->get_firstChild(&txt);
ReturnCheck(_T("get_firstChild"), res);
res = txt->get_nodeValue(&xname);
ReturnCheck(_T("get_nodeValue"), res);
name = W2A((wchar_t *)(_bstr_t)xname);
txt->Release();
txt = NULL;
}
if(wcscmp((wchar_t *)nam, L"role") == 0)
{
IXMLDOMNode *txt;
res = child->get_firstChild(&txt);
ReturnCheck(_T("get_firstChild"), res);
res = txt->get_nodeValue(&xrole);
ReturnCheck(_T("get_nodeValue"), res);
role = W2A((wchar_t *)(_bstr_t)xrole);
txt->Release();
txt = NULL;
}
}
child->Release();
child = NULL;
}
// print
cout << "no = " << no << endl;
cout << "name = " << name << endl;
cout << "role = " << role << endl;
//
children->Release();
children = NULL;
employee->Release();
employee = NULL;
}
// add employee
IXMLDOMNode *retnode;
IXMLDOMText *txtnode;
IXMLDOMElement *newname;
res = doc->createElement(L"name", &newname);
ReturnCheck(_T("createElement"), res);
res = doc->createTextNode(L"Dave D", &txtnode);
ReturnCheck(_T("createTextNode"), res);
res = newname->appendChild(txtnode, &retnode);
ReturnCheck(_T("appendChild"), res);
IXMLDOMElement *newrole;
res = doc->createElement(L"role", &newrole);
ReturnCheck(_T("createElement"), res);
res = doc->createTextNode(L"Intern", &txtnode);
ReturnCheck(_T("createTextNode"), res);
res = newrole->appendChild(txtnode, &retnode);
ReturnCheck(_T("appendChild"), res);
IXMLDOMElement *newemployee;
res = doc->createElement(L"employee", &newemployee);
ReturnCheck(_T("createElement"), res);
res = newemployee->setAttribute(L"no", _variant_t(L"4"));
ReturnCheck(_T("setAttribute"), res);
res = newemployee->appendChild(newname, &retnode);
ReturnCheck(_T("appendChild"), res);
res = newemployee->appendChild(newrole, &retnode);
ReturnCheck(_T("appendChild"), res);
IXMLDOMElement *rootnode;
res = doc->get_documentElement(&rootnode);
ReturnCheck(_T("get_documentElement"), res);
res = rootnode->appendChild(newemployee, &retnode);
ReturnCheck(_T("appendChild"), res);
newname->Release();
newname = NULL;
newrole->Release();
newrole = NULL;
newemployee->Release();
newemployee = NULL;
rootnode->Release();
rootnode = NULL;
// write out
BSTR xml;
res = doc->get_xml(&xml);
ReturnCheck(_T("get_xml"), res);
cout << W2A((wchar_t *)xml) << endl;
// cleanup
employees->Release();
employees = NULL;
doc->Release();
doc = NULL;
//
CoUninitialize();
return 0;
}
program W3CDOM;
uses
Classes, DOM, XMLRead, XMLWrite;
const
XML_FILE = 'employees.xml';
var
parser : TDOMParser;
doc : TXMLDocument;
employees, children : TDOMNodeList;
employee, child : TDOMNode;
newname, newrole, newemployee : TDOMElement;
no, name, role : WideString;
i, j : integer;
begin
(* read from file to DOM tree *)
parser := TDOMparser.Create;
parser.Parse(TXMLInputSource.Create(TFileStream.Create(XML_FILE, fmOpenRead)), doc);
parser.Free;
(* iterate over all employee elements *)
employees := doc.GetElementsByTagName('employee');
for i := 0 to employees.Count - 1 do begin
employee := employees.Item[i];
(* find attribute no *)
no := employee.Attributes.GetNamedItem('no').NodeValue;
(* find sub elements name and role *)
name := '';
role := '';
children := employee.ChildNodes;
for j := 0 to children.Count - 1 do begin
child := children.Item[j];
if child.NodeType = ELEMENT_NODE then begin
if child.NodeName = 'name' then begin
name := child.FirstChild.NodeValue;
end;
if child.NodeName = 'role' then begin
role := child.FirstChild.NodeValue;
end;
end;
end;
(* print *)
writeln('no=', no);
writeln('name=', name);
writeln('role=', role);
end;
(* add employee *)
newname := doc.CreateElement('name');
newname.AppendChild(doc.CreateTextNode('Dave D'));
newrole := doc.CreateElement('role');
newrole.AppendChild(doc.CreateTextNode('Intern'));
newemployee := doc.CreateElement('employee');
newemployee.SetAttribute('no', '4');
newemployee.AppendChild(newname);
newemployee.AppendChild(newrole);
doc.DocumentElement.AppendChild(newemployee);
(* write out *)
WriteXMLFile(doc, 'employees2.xml');
(* *)
doc.Free;
end.
program W3CDOMfl;
uses
flcXMLParser, flcXMLDocument;
const
XML_FILE = 'employees.xml';
var
parser : TxmlParser;
doc : TxmlDocument;
employees : AxmlElementArray;
employee : AxmlElement;
newemployee : TxmLElement;
name, role : WideString;
no, i : integer;
begin
(* read from file to DOM tree *)
parser := TxmlParser.Create;
parser.SetFileName(XML_FILE);
doc := parser.ExtractDocument;
parser.Free;
(* iterate over all employee elements *)
employees := doc.RootElement.ElementsByName('employee');
for i := Low(employees) to High(employees) do begin
employee := employees[i];
(* find attribute no, name and role *)
no := employee.AttrAsInteger('no');
name := employee.ElementByName('name').Content.AsUnicodeString();
role := employee.ElementByName('role').Content.AsUnicodeString();
(* print *)
writeln('no=', no);
writeln('name=', name);
writeln('role=', role);
end;
(* add employee *)
(* not supported *)
(* write out *)
writeln(doc.AsUnicodeString());
end.
from xml.dom.minidom import parse
from xml.dom import Node
XML_FILE = '/work/employees.xml'
# read from file to DOM tree
doc = parse(XML_FILE)
# iterate over all employee elements
employees = doc.getElementsByTagName('employee')
for employee in employees:
# find attribute no
no = employee.getAttribute('no')
# find sub elements name and role
name = ''
role = ''
children = employee.childNodes
for child in children:
if child.nodeType == Node.ELEMENT_NODE:
if child.nodeName == 'name':
name = child.firstChild.nodeValue
if child.nodeName == 'role':
role = child.firstChild.nodeValue
# print
print('no = %s' % no)
print('name = %s' % name)
print('role = %s' % role)
# add employee
newname = doc.createElement('name')
newname.appendChild(doc.createTextNode('Dave D'))
newrole = doc.createElement('role')
newrole.appendChild(doc.createTextNode('Intern'))
newemployee = doc.createElement('employee')
newemployee.setAttribute('no', '4')
newemployee.appendChild(newname)
newemployee.appendChild(newrole)
doc.documentElement.appendChild(newemployee)
# write out
doc.writexml(open('/work/employees2.xml', 'w'), '', ' ', '\n')
Due to W3C DOM being a bit verbose/cumbersome then some alternative DOM's aka in memory tree representations have been created.
Pro's:
Con's:
package xmlproc;
import java.io.FileWriter;
import java.io.Writer;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;
public class JDOM2 {
private final static String XML_FILE = "/work/employees.xml";
public static void main(String[] args) throws Exception {
// read from file to DOM tree
SAXBuilder b = new SAXBuilder();
Document doc = b.build(XML_FILE);
// iterate over all employee elements
for(Element employee : doc.getRootElement().getChildren()) {
// find attribute no
String no = employee.getAttributeValue("no");
// find sub elements name and role
String name = employee.getChild("name").getText();
String role = employee.getChild("role").getText();
// print
System.out.println("no=" + no);
System.out.println("name=" + name);
System.out.println("role=" + role);
}
// add employee
Element newname = new Element("name");
newname.setText("Dave D");
Element newrole = new Element("role");
newrole.setText("Intern");
Element newemployee = new Element("employee");
newemployee.setAttribute("no", "4");
newemployee.addContent(newname);
newemployee.addContent(newrole);
doc.getRootElement().addContent(newemployee);
// write out
XMLOutputter fmt = new XMLOutputter(Format.getPrettyFormat());
Writer w = new FileWriter("/work/employees2.xml");
w.write(fmt.outputString(doc));
w.close();
}
}
Much easier than W3C DOM.
package xmlproc;
import java.io.FileOutputStream;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
public class DOM4J {
private final static String XML_FILE = "/work/employees.xml";
@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
// read from file to DOM tree
SAXReader xmlReader = new SAXReader();
Document doc = xmlReader.read(XML_FILE);
// iterate over all employee elements
for(Element employee : (List<Element>)doc.getRootElement().elements("employee")) {
// find attribute no
String no = employee.attributeValue("no");
// find sub elements name and role
String name = employee.elementText("name");
String role = employee.elementText("role");
// print
System.out.println("no=" + no);
System.out.println("name=" + name);
System.out.println("role=" + role);
}
// add employee
Element newemployee = doc.getRootElement().addElement("employee");
newemployee.addAttribute("no", "4");
newemployee.addElement("name").addText("Dave D");
newemployee.addElement("role").addText("Intern");
// write out
OutputFormat of = OutputFormat.createPrettyPrint();
FileOutputStream fos = new FileOutputStream("/work/employees2.xml");
XMLWriter xw = new XMLWriter(fos, of);
xw.write(doc);
xw.close();
fos.close();
}
}
Much easier than W3C DOM.
using System;
using System.IO;
using System.Xml.Linq;
namespace XmlProc.AltDOM
{
public class Program
{
private const String XML_FILE = @"C:\work\employees.xml";
public static void Main(string[] args)
{
// read from file to DOM tree
XDocument doc = XDocument.Load(XML_FILE);
// iterate over all employee elements
foreach(XElement employee in doc.Root.Elements("employee"))
{
// find attribute no
string no = employee.Attribute("no").Value;
string name = employee.Element("name").Value;
string role = employee.Element("role").Value;
Console.WriteLine("no = " + no);
Console.WriteLine("name = " + name);
Console.WriteLine("role = " + role);
}
// add employee
XElement newname = new XElement("name");
newname.Value = "Dave D";
XElement newrole = new XElement("role");
newrole.Value = "Intern";
XElement newemployee = new XElement("employee", new XAttribute("no", "4"));
newemployee.Add(newname);
newemployee.Add(newrole);
doc.Root.Add(newemployee);
// write out
using(StreamWriter sw = new StreamWriter(@"C:\work\employees2.xml"))
{
doc.Save(sw);
}
Console.ReadKey();
}
}
}
Easier than W3C DOM.
Imports System
Imports System.IO
Imports System.Xml.Linq
Namespace XmlProc.AltDOM
Public Class Program
Private Const XML_FILE As String = "C:\work\employees.xml"
Public Shared Sub Main(args As String())
' read from file to DOM tree
Dim doc As XDocument = XDocument.Load(XML_FILE)
' iterate over all employee elements
For Each employee As XElement In doc.Root.Elements("employee")
' find attribute no
Dim no As String = employee.Attribute("no").Value
Dim name As String = employee.Element("name").Value
Dim role As String = employee.Element("role").Value
Console.WriteLine("no = " & no)
Console.WriteLine("name = " & name)
Console.WriteLine("role = " & role)
Next
' add employee
Dim newname As New XElement("name")
newname.Value = "Dave D"
Dim newrole As New XElement("role")
newrole.Value = "Intern"
Dim newemployee As New XElement("employee", New XAttribute("no", "4"))
newemployee.Add(newname)
newemployee.Add(newrole)
doc.Root.Add(newemployee)
' write out
Using sw As New StreamWriter("C:\work\employees2.xml")
doc.Save(sw)
End Using
Console.ReadKey()
End Sub
End Class
End Namespace
Easier than W3C DOM.
PHP also has a XML parser that parse XML into a pseudo DOM tree. It utilize PHP dynamic typing to generate a real object structure with named properties instead of the DOM generic nodes with string values. This result in a very easy to use interface.
<html>
<head>
<title>SimpleXML</title>
</head>
<body>
<h1>SimpleXML</h1>
<table>
<tr>
<th>No</th>
<th>Name</th>
<th>Role</th>
</tr>
<?php
define('XML_FILE','/work/employees.xml');
// read from file to pseudo DOM tree
$xml = new SimpleXMLElement(file_get_contents(XML_FILE));
// iterate over all employee elements
foreach($xml->employee as $employee) {
// find attribute no
$no = $employee['no'];
// find sub elements name and role
$name = $employee->name;
$role = $employee->role;
// print
echo "<tr>\r\n";
echo "<td>$no</td>\r\n";
echo "<td>$name</td>\r\n";
echo "<td>$role</td>\r\n";
echo "</tr>\r\n";
}
?>
</table>
</body>
</html>
Easier than W3C DOM.
SimpleXML is much more used than DOM in PHP.
Note that it is possible to update an SimpleXML object structure, but I do not consider that a natural usage of SimpleXML.
Also note that XML namespaces and SimpleXML is known to cause problems.
Python has an ElementTree API for more convenient access to XML.
from xml.etree import ElementTree
XML_FILE = '/work/employees.xml'
# read from file to DOM tree
doc = ElementTree.parse(XML_FILE)
# iterate over all employee elements
for employee in doc.getroot().iter('employee'):
# find attribute no
no = employee.get('no')
# find sub elements name and role
name = employee.find('name').text
role = employee.find('role').text
# print
print('no = %s' % no)
print('name = %s' % name)
print('role = %s' % role)
# add employee
newemployee = ElementTree.SubElement(doc.getroot(), 'employee')
newemployee.set('no', '4')
newname = ElementTree.SubElement(newemployee, 'name')
newname.text = 'Dave D'
newrole = ElementTree.SubElement(newemployee, 'role')
newrole.text = 'Intern'
# write out
doc.write('/work/employees2.xml')
A streaming push parser reads XML and perform callbacks whenever it encounters something.
The only widely used parser of this type is the Java SAX parser. And therefore it is often simply referred to as a SAX parser.
Pro's:
Con's:
package xmlproc;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
public class ParseSAX {
private final static String XML_FILE = "/work/employees.xml";
public static void main(String[] args) throws Exception {
// process file with MySaxParser as event handler
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
xr.setContentHandler(new MySaxParser());
xr.parse(XML_FILE);
}
}
class MySaxParser extends DefaultHandler {
private StringBuilder sb = new StringBuilder();
// accumulate text (note: can be called multiple times for a text node)
public void characters(char buf[], int offset, int len) throws SAXException {
sb.append(new String(buf, offset, len));
}
// process start element
public void startElement(String namespaceURI, String localName, String rawName, Attributes atts) throws SAXException {
// if employee element find no and print
if (rawName.equals("employee")) {
String no = atts.getValue("no");
System.out.println("no=" + no);
}
// if sub elements name or role start accumulating
if (rawName.equals("name") || rawName.equals("role")) {
sb = new StringBuilder();
}
}
// process end element
public void endElement(String namespaceURI, String localName, String rawName) throws SAXException {
// if sub elements name or role stop accumulating and print
if (rawName.equals("name")) {
String name = sb.toString();
System.out.println("name=" + name);
}
if (rawName.equals("role")) {
String role = sb.toString();
System.out.println("role=" + role);
}
}
}
This example is not so bad, but try imagine how the code would be with a deeeply nested XML structure.
<html>
<head>
<title>Push parser</title>
</head>
<body>
<h1>Push parser</h1>
<table>
<tr>
<th>No</th>
<th>Name</th>
<th>Role</th>
</tr>
<?php
define('XML_FILE','/work/employees.xml');
// process file with specified event handlers
$parser = xml_parser_create();
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); // default is to uppercase everything !!
xml_set_element_handler($parser, 'startElement', 'endElement');
xml_set_character_data_handler($parser, 'characters');
xml_parse($parser, file_get_contents(XML_FILE));
xml_parser_free($parser);
$accdata = '';
// accumulate text (note: can be called multiple times for a text node)
function characters($parser, $data) {
global $accdata;
$accdata .= $data;
}
// process start element, if 'employee' find attribute no
function startElement($parser, $name, $atts) {
global $accdata;
// if employee element find no and print
if($name == 'employee') {
$no = $atts['no'];
echo "<tr>\r\n";
echo "<td>$no</td>\r\n";
}
// if sub elements name or role start accumulating
if($name == 'name' || $name == 'role') {
$accdata = '';
}
}
// process end element
function endElement($parser, $name) {
global $accdata;
// if sub elements name or role stop accumulating and print
if($name == 'name') {
$name = $accdata;
echo "<td>$name</td>\r\n";
}
if($name == 'role') {
$role = $accdata;
echo "<td>$role</td>\r\n";
}
if($name == 'employee') {
echo "</tr>\r\n";
}
}
?>
</table>
</body>
</html>
This example is not so bad, but try imagine how the code would be with a deeeply nested XML structure.
And the usage of global is really bad as well.
I have never seen this XML parser being used in PHP.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/parser.h>
#define XML_FILE "employees.xml"
struct state
{
int len;
char *buf;
};
/* accumulate text (note: can be called multiple times for a text node) */
void onCharacters(struct state *state, const xmlChar *ch, int len)
{
memcpy(state->buf + state->len, ch, len);
state->len += len;
}
/* process start element */
void onStartElement(struct state *state, const xmlChar *name, const xmlChar **attrs)
{
/* if employee element find no and print */
if(strcmp(name, "employee") == 0)
{
printf("no=%s\n", attrs[1]); // [0] is first attribute name, [1] is first attribute value
}
/* if sub elements name or role start accumulating */
if(strcmp(name, "name") == 0 || strcmp(name, "role") == 0)
{
state->len = 0;
}
}
/* process end element */
void onEndElement(struct state *state, const xmlChar *name)
{
/* if sub elements name or role stop accumulating and print */
if(strcmp(name, "name") == 0)
{
state->buf[state->len] = 0;
printf("name=%s\n", state->buf);
}
if(strcmp(name, "role") == 0)
{
state->buf[state->len] = 0;
printf("role=%s\n", state->buf);
}
}
int main()
{
xmlSAXHandler *myhandler;
struct state mystate;
// process file with specified event handlers
myhandler = malloc(sizeof(xmlSAXHandler));
memset(myhandler, 0, sizeof(xmlSAXHandler));
myhandler->characters = (charactersSAXFunc)onCharacters;
myhandler->startElement = (startElementSAXFunc)onStartElement;
myhandler->endElement = (endElementSAXFunc)onEndElement;
mystate.len = 0;
mystate.buf = malloc(10000);
xmlSAXUserParseFile(myhandler, &mystate, XML_FILE);
/* */
return EXIT_SUCCESS;
}
Windows GCC build:
gcc -m32 -Wall -Wno-pointer-sign -I%ICONVPATH%\include -I%LIBXML2PATH%\include\libxml2 -I%LIBXSLTPATH%\include %1.c -L%LIBXML2PATH%\lib -lxml2 -L%ICONVPATH%\lib -liconv -L%ZLIBPATH%\lib -lz -L%LIBXSLTPATH%\lib -lxslt -o %1.exe
#include <iostream>
#include <cstdlib>
using namespace std;
#include <xercesc/parsers/SAXParser.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/XMLString.hpp>
using namespace xercesc;
const char *XML_FILE = "employees.xml";
// hackish utility functions - do not use in production
static XMLCh buf[10][1000];
static int bufix = -1;
static XMLCh *UTF16(const char *s)
{
bufix = (bufix + 1) % 10;
XMLString::transcode(s, buf[bufix], sizeof(buf[bufix]) - 1);
return buf[bufix];
}
static char *UTF8(const XMLCh *s)
{
return XMLString::transcode(s);
}
class MySaxParser : public HandlerBase
{
private:
XMLCh buf[10000]; // hardcoded size - do not use in production
int buflen;
public:
void characters (const XMLCh *const chars, const XMLSize_t length);
void startElement(const XMLCh *const name, AttributeList& attributes);
void endElement(const XMLCh *const name);
};
// accumulate text (note: can be called multiple times for a text node)
void MySaxParser::characters(const XMLCh *const chars, const XMLSize_t length)
{
memcpy(buf + buflen, chars, length * sizeof(XMLCh));
buflen += length * sizeof(XMLCh);
}
// process start element
void MySaxParser::startElement(const XMLCh *const name, AttributeList& atts)
{
// if employee element find no and print
if(strcmp(UTF8(name), "employee") == 0)
{
int no = atoi(UTF8(atts.getValue(UTF16("no"))));
cout << "no = " << no << endl;
}
// if sub elements name or role start accumulating
if(strcmp(UTF8(name), "name") == 0 || strcmp(UTF8(name), "role") == 0)
{
buflen = 0;
}
}
// process end element
void MySaxParser::endElement(const XMLCh *const name)
{
// if sub elements name or role stop accumulating and print
if(strcmp(UTF8(name), "name") == 0)
{
memset(buf + buflen, 0, sizeof(XMLCh));
char *name = UTF8(buf);
cout << "name = " << name << endl;
}
if(strcmp(UTF8(name), "role") == 0)
{
memset(buf + buflen, 0, sizeof(XMLCh));
char *role = UTF8(buf);
cout << "role = " << role << endl;
}
}
int main()
{
try
{
XMLPlatformUtils::Initialize();
// process file with MySaxParser as event handler
SAXParser *parser = new SAXParser();
parser->setDocumentHandler(new MySaxParser());
parser->parse(XML_FILE);
//
XMLPlatformUtils::Terminate();
}
catch (const XMLException &ex)
{
cout << UTF8(ex.getMessage()) << endl;
exit(1);
}
catch (const SAXParseException &ex)
{
cout << UTF8(ex.getMessage()) << endl;
exit(1);
}
catch (...)
{
cout << "Ooops" << endl;
exit(1);
}
return 0;
}
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <expat.h>
#define XML_FILE "employees.xml"
struct state
{
int len;
char *buf;
};
/* accumulate text (note: can be called multiple times for a text node) */
void onCharacters(struct state *state, const XML_Char *ch, int len)
{
memcpy(state->buf + state->len, ch, len);
state->len += len;
}
/* process start element */
void onStartElement(struct state *state, const XML_Char *name, const XML_Char **attrs)
{
/* if employee element find no and print */
if(strcmp(name, "employee") == 0)
{
printf("no=%s\n", attrs[1]); // [0] is first attribute name, [1] is first attribute value
}
/* if sub elements name or role start accumulating */
if(strcmp(name, "name") == 0 || strcmp(name, "role") == 0)
{
state->len = 0;
}
}
/* process end element */
void onEndElement(struct state *state, const XML_Char *name)
{
/* if sub elements name or role stop accumulating and print */
if(strcmp(name, "name") == 0)
{
state->buf[state->len] = 0;
printf("name=%s\n", state->buf);
}
if(strcmp(name, "role") == 0)
{
state->buf[state->len] = 0;
printf("role=%s\n", state->buf);
}
}
int main()
{
XML_Parser parser;
struct state mystate;
FILE *fp;
char buf[100000];
int buflen;
int done;
/* process file with specified event handlers */
parser = XML_ParserCreate(NULL);
XML_SetUserData(parser, &mystate);
XML_SetElementHandler(parser, (XML_StartElementHandler)onStartElement, (XML_EndElementHandler)onEndElement);
XML_SetCharacterDataHandler(parser, (XML_CharacterDataHandler)onCharacters);
mystate.len = 0;
mystate.buf = malloc(10000);
fp = fopen(XML_FILE, "r");
do
{
buflen = fread(buf, 1, sizeof(buf), fp);
done = buflen < sizeof(buf);
XML_Parse(parser, buf, buflen, done);
}
while(!done);
fclose(fp);
XML_ParserFree(parser);
/* */
return 0;
}
#include <iostream>
using namespace std;
#include <QtCore/QFile>
#include <QtXML/QXmlSimpleReader>
class MySaxParser : public QXmlDefaultHandler
{
private:
QString buf;
public:
virtual bool characters(const QString &ch);
virtual bool startElement(const QString &namespaceURI, const QString &localName, const QString &qName, const QXmlAttributes &atts);
virtual bool endElement(const QString &namespaceURI, const QString &localName, const QString &qName);
};
// accumulate text (note: can be called multiple times for a text node)
bool MySaxParser::characters(const QString &ch)
{
buf += ch;
return true;
}
// process start element
bool MySaxParser::startElement(const QString &namespaceURI, const QString &localName, const QString &qName, const QXmlAttributes &atts)
{
// if employee element find no and print
if(localName == "employee")
{
int no = atts.value("no").toInt();
cout << "no = " << no << endl;
}
// if sub elements name or role start accumulating
if(localName == "name" || localName == "role")
{
buf = "";
}
return true;
}
// process end element
bool MySaxParser::endElement(const QString &namespaceURI, const QString &localName, const QString &qName)
{
// if sub elements name or role stop accumulating and print
if(localName == "name")
{
string name = buf.toStdString();
cout << "name = " << name << endl;
}
if(localName == "role")
{
string role = buf.toStdString();
cout << "role = " << role << endl;
}
return true;
}
const char *XML_FILE = "employees.xml";
void realmain()
{
// process file with MySaxParser as event handler
QXmlSimpleReader parser;
parser.setContentHandler(new MySaxParser());
parser.parse(new QXmlInputSource(new QFile(XML_FILE)));
}
from xml.sax import make_parser
from xml.sax.handler import ContentHandler
class MySaxParser(ContentHandler):
def __init__(self):
self.sb = ''
# accumulate text (note: can be called multiple times for a text node)
def characters(self, content):
self.sb = self.sb + content
# process start element
def startElement(self, rawname, attrs):
# if employee element find no and print
if rawname == 'employee':
no = attrs['no']
print('no = %s' % no)
# if sub elements name or role start accumulating
if (rawname == 'name') or (rawname == 'role'):
self.sb = ''
# process end element
def endElement(self, rawname):
# if sub elements name or role stop accumulating and print
if rawname == 'name':
name = self.sb
print('name = %s' % name)
if rawname == 'role':
role = self.sb
print('role = %s' % role)
XML_FILE = '/work/employees.xml'
# process file with MySaxParser as event handler
parser = make_parser()
parser.setContentHandler(MySaxParser())
parser.parse(XML_FILE)
A streaming pull parser reads XML using a traditional get/read API.
Pro's:
Con's:
package xmlproc;
import java.io.FileInputStream;
import java.io.InputStream;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;
public class ParseStAX {
private final static String XML_FILE = "/work/employees.xml";
public static void main(String[] args) throws Exception {
// open file
XMLInputFactory xif = XMLInputFactory.newInstance();
InputStream is = new FileInputStream(XML_FILE);
XMLStreamReader xsr = xif.createXMLStreamReader(is);
// process file
StringBuilder sb = new StringBuilder();
while(xsr.hasNext()) {
xsr.next();
switch(xsr.getEventType()) {
// accumulate text (note: can be entered multiple times for a text node)
case XMLStreamReader.CHARACTERS:
sb.append(xsr.getText());
break;
case XMLStreamReader.START_ELEMENT:
// if employee element find no and print
if (xsr.getLocalName().equals("employee")) {
String no = xsr.getAttributeValue(null, "no");
System.out.println("no=" + no);
}
// if sub elements name or role start accumulating
if (xsr.getLocalName().equals("name") || xsr.getLocalName().equals("role")) {
sb = new StringBuilder();
}
break;
case XMLStreamReader.END_ELEMENT:
// if sub elements name or role stop accumulating and print
if (xsr.getLocalName().equals("name")) {
String name = sb.toString();
System.out.println("name=" + name);
}
if (xsr.getLocalName().equals("role")) {
String role = sb.toString();
System.out.println("role=" + role);
}
break;
}
}
// close file
xsr.close();
is.close();
}
}
using System;
using System.IO;
using System.Text;
using System.Xml;
namespace XmlProc.Pull
{
public class Program
{
private const String XML_FILE = @"C:\work\employees.xml";
public static void Main(string[] args)
{
// open file
using(StreamReader sr = new StreamReader(XML_FILE))
{
XmlReader xr = new XmlTextReader(sr);
// process file
StringBuilder sb = new StringBuilder();
while(xr.Read())
{
switch(xr.NodeType)
{
// accumulate text
case XmlNodeType.Text:
sb.Append(xr.Value);
break;
case XmlNodeType.Element:
// if employee element find no and print
if (xr.Name == "employee")
{
String no = xr.GetAttribute("no");
Console.WriteLine("no=" + no);
}
// if sub elements name or role start accumulating
if (xr.Name == "name" || xr.Name == "role")
{
sb = new StringBuilder();
}
break;
case XmlNodeType.EndElement:
// if sub elements name or role stop accumulating and print
if (xr.Name == "name")
{
String name = sb.ToString();
Console.WriteLine("name=" + name);
}
if (xr.Name == "role")
{
String role = sb.ToString();
Console.WriteLine("role=" + role);
}
break;
}
}
xr.Close();
}
Console.ReadKey();
}
}
}
Imports System
Imports System.IO
Imports System.Text
Imports System.Xml
Namespace XmlProc.Pull
Public Class Program
Private Const XML_FILE As String = "C:\work\employees.xml"
Public Shared Sub Main(args As String())
' open file
Using sr As New StreamReader(XML_FILE)
Dim xr As XmlReader = New XmlTextReader(sr)
' process file
Dim sb As New StringBuilder()
While xr.Read()
Select Case xr.NodeType
' accumulate text
Case XmlNodeType.Text
sb.Append(xr.Value)
Exit Select
Case XmlNodeType.Element
' if employee element find no and print
If xr.Name = "employee" Then
Dim no As String = xr.GetAttribute("no")
Console.WriteLine("no=" & no)
End If
' if sub elements name or role start accumulating
If xr.Name = "name" OrElse xr.Name = "role" Then
sb = New StringBuilder()
End If
Exit Select
Case XmlNodeType.EndElement
' if sub elements name or role stop accumulating and print
If xr.Name = "name" Then
Dim name As String = sb.ToString()
Console.WriteLine("name=" & name)
End If
If xr.Name = "role" Then
Dim role As String = sb.ToString()
Console.WriteLine("role=" & role)
End If
Exit Select
End Select
End While
xr.Close()
End Using
Console.ReadKey()
End Sub
End Class
End Namespace
<html>
<head>
<title>Pull parser</title>
</head>
<body>
<h1>Pull parser</h1>
<table>
<tr>
<th>No</th>
<th>Name</th>
<th>Role</th>
</tr>
<?php
define('XML_FILE','/work/employees.xml');
// open file
$rdr = new XMLReader();
$rdr->XML(file_get_contents(XML_FILE));
// process file
$accdata = '';
while($rdr->read()){
switch ($rdr->nodeType) {
// accumulate text
case XMLReader::TEXT:
$accdata .= $rdr->value;
break;
case XMLReader::ELEMENT:
// if employee element find no and print
if($rdr->localName == 'employee') {
$no = $rdr->getAttribute('no');
echo "<tr>\r\n";
echo "<td>$no</td>\r\n";
}
// if sub elements name or role start accumulating
if($rdr->localName == 'name' || $rdr->localName == 'role') {
$accdata = '';
}
break;
case XMLReader::END_ELEMENT:
// if sub elements name or role stop accumulating and print
if($rdr->localName == 'name') {
$name = $accdata;
echo "<td>$name</td>\r\n";
}
if($rdr->localName == 'role') {
$role = $accdata;
echo "<td>$role</td>\r\n";
}
if($rdr->localName == 'employee') {
echo "</tr>\r\n";
}
break;
}
}
?>
</table>
</body>
</html>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlreader.h>
#define XML_FILE "employees.xml"
int main()
{
xmlTextReader *xr;
char *buf;
buf = malloc(10000);
buf[0] = 0;
xr = xmlNewTextReaderFilename(XML_FILE);
if(xr != NULL)
{
while(xmlTextReaderRead(xr))
{
switch(xmlTextReaderNodeType(xr))
{
case XML_READER_TYPE_TEXT:
/* accumulate text */
strcat(buf, xmlTextReaderValue(xr));
break;
case XML_READER_TYPE_ELEMENT:
/* if employee element find no and print */
if(strcmp(xmlTextReaderLocalName(xr), "employee") == 0)
{
printf("no=%s\n",xmlTextReaderGetAttribute(xr, "no"));
}
/* if sub elements name or role start accumulating */
if(strcmp(xmlTextReaderLocalName(xr), "name") == 0 || strcmp(xmlTextReaderLocalName(xr), "role") == 0)
{
buf[0] = 0;
}
break;
case XML_READER_TYPE_END_ELEMENT:
/* if sub elements name or role stop accumulating and print */
if(strcmp(xmlTextReaderLocalName(xr), "name") == 0)
{
printf("name=%s\n", buf);
}
if(strcmp(xmlTextReaderLocalName(xr), "role") == 0)
{
printf("role=%s\n", buf);
}
break;
}
}
xmlFreeTextReader(xr);
}
else
{
fprintf(stderr, "Unable to open XML file: %s\n", XML_FILE);
}
return EXIT_SUCCESS;
}
Windows GCC build:
gcc -m32 -Wall -Wno-pointer-sign -I%ICONVPATH%\include -I%LIBXML2PATH%\include\libxml2 -I%LIBXSLTPATH%\include %1.c -L%LIBXML2PATH%\lib -lxml2 -L%ICONVPATH%\lib -liconv -L%ZLIBPATH%\lib -lz -L%LIBXSLTPATH%\lib -lxslt -o %1.exe
#include <iostream>
using namespace std;
#include <QtCore/QFile>
#include <QtCore/QXmlStreamReader>
const char *XML_FILE = "employees.xml";
void realmain()
{
// open file
QFile file(XML_FILE);
file.open(QIODevice::ReadOnly);
QXmlStreamReader xsr(&file);
// process file
QString buf = "";
while(!xsr.atEnd()) {
xsr.readNext();
switch(xsr.tokenType())
{
// accumulate text (note: can be entered multiple times for a text node)
case QXmlStreamReader::Characters:
buf += xsr.text();
break;
case QXmlStreamReader::StartElement:
// if employee element find no and print
if(xsr.name() == "employee")
{
string no = xsr.attributes().value("no").toString().toStdString();
cout << "no = " << no << endl;
}
if(xsr.name() == "name" || xsr.name() == "role")
{
buf = "";
}
break;
case QXmlStreamReader::EndElement:
// if sub elements name or role stop accumulating and print
if(xsr.name() == "name")
{
string name = buf.toStdString();
cout << "name = " << name << endl;
}
if(xsr.name() == "role")
{
string role = buf.toStdString();
cout << "role = " << role << endl;
}
break;
}
}
//
file.close();
}
program Reader;
uses
Classes, XmlReader, XmlTextReader, XmlUtils;
const
XML_FILE = 'employees.xml';
var
xis : TXMLInputSource;
xr : TXMLReader;
buf, no, name, role : WideString;
begin
(* open file *)
xis := TXMLInputSource.Create(TFileStream.Create(XML_FILE, fmOpenRead));
xr := TXMLTextReader.Create(xis, TXMLReaderSettings.Create);
buf := '';
while xr.Read do begin
case xr.nodeType of
(* accumulate text (note: can be entered multiple times for a text node) *)
ntText:
begin
buf := buf + xr.Value;
end;
ntElement:
begin
(* if employee element find no and print *)
if xr.Name = 'employee' then begin
no := xr.GetAttribute('no');
writeln('no=', no);
end;
(* if sub elements name or role start accumulating *)
if (xr.Name = 'name') or (xr.Name = 'role') then begin
buf := '';
end;
end;
ntEndElement:
begin
(* if sub elements name or role stop accumulating and print *)
if xr.Name = 'name' then begin
name := buf;
writeln('name=', name);
end;
if xr.Name = 'role' then begin
role := buf;
writeln('role=', role);
end;
end;
end;
end;
(* close file *)
xr.Free;
xis.Free;
end.
XML binding takes a completely different approach to reading and writing XML. Instead of doing it manually it maps classes in the programming language to a XML format, so read and write becomes a single operation.
Pro's:
Con's:
package xmlproc;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement;
public class Employee {
private int no;
private String name;
private String role;
public Employee() {
this(0, null, null);
}
public Employee(int no, String name, String role) {
this.no = no;
this.name = name;
this.role = role;
}
@XmlAttribute
public int getNo() {
return no;
}
public void setNo(int no) {
this.no = no;
}
@XmlElement
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
@XmlElement
public String getRole() {
return role;
}
public void setRole(String role) {
this.role = role;
}
@Override
public String toString() {
return String.format("[%d,%s,%s]", no, name, role);
}
}
The annotations @XmlAttribute and @XmlElement simply tell whether data is an attribute or an element.
package xmlproc;
import java.util.ArrayList;
import java.util.List;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElements;
import javax.xml.bind.annotation.XmlRootElement;
@XmlRootElement
public class Employees {
private List<Employee> list = new ArrayList<>();
@XmlElements(@XmlElement(name="employee",type=Employee.class))
public List<Employee> getList(){
return list;
}
public void setList(List<Employee> list) {
this.list = list;
}
@Override
public String toString() {
return list.toString();
}
}
Annotation @XmlRootElement tell that this is the top class. Annotation @XmlElements describes what the list is.
package xmlproc;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Marshaller;
import javax.xml.bind.Unmarshaller;
public class JAXB {
private final static String XML_FILE = "/work/employees.xml";
public static void main(String[] args) throws Exception {
// setup context
JAXBContext jxbctx = JAXBContext.newInstance(Employees.class);
// read from file
Unmarshaller um = jxbctx.createUnmarshaller();
Employees employees = (Employees)um.unmarshal(new FileInputStream(XML_FILE));
// add employee
employees.getList().add(new Employee(4, "Dave D", "Intern"));
// write out
Marshaller m = jxbctx.createMarshaller();
m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
OutputStream os = new FileOutputStream("/work/employees2.xml");
m.marshal(employees, os);
os.close();
}
}
If the XML is documented in an XML schema (XSD file), then one does not need to write the data classes manually. Instead they can be generated from the schema.
Command line:
xjc -d dirwheregeneratedsourceswillbeplaced -p packageforgeneratedclasses nameofschemafile.xsd
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Xml.Serialization;
namespace XmlProc.Binding
{
public class Employee
{
public Employee() : this(0, "", "")
{
}
public Employee(int no, string name, string role)
{
this.No = no;
this.Name = name;
this.Role = role;
}
[XmlAttribute(AttributeName="no")]
public int No { get; set; }
[XmlElement(ElementName="name")]
public string Name { get; set; }
[XmlElement(ElementName="role")]
public string Role { get; set; }
public override string ToString()
{
return string.Format("[{0},{1},{2}]", No, Name, Role);
}
}
[XmlRoot(ElementName="employees")]
public class Employees
{
[XmlElement(ElementName="employee")]
public List<Employee> EmployeeList { get; set; }
public Employees()
{
this.EmployeeList = new List<Employee>();
}
public override string ToString()
{
return "{" + string.Join(",", EmployeeList.ConvertAll(e => e.ToString()).ToArray()) + "}";
}
}
public class Program
{
private const String XML_FILE = @"C:\work\employees.xml";
public static void Main(string[] args)
{
// setup context
XmlSerializer ser = new XmlSerializer(typeof(Employees));
// read from file
Employees employees = (Employees)ser.Deserialize(new StreamReader(XML_FILE));
// add employee
employees.EmployeeList.Add(new Employee(4, "Dave D", "Intern"));
// write out
using(StreamWriter sw = new StreamWriter(@"C:\work\employees2.xml"))
{
ser.Serialize(sw, employees);
}
Console.ReadKey();
}
}
}
The attributes XmlAttribute and XmlElement simply tell whether data is an attribute or an element. Attribute XmlRoot tell that this is the top class.
If the XML is documented in an XML schema (XSD file), then one does not need to write the data classes manually. Instead they can be generated from the schema.
Command line:
xsd /classes /language:csorvb /namespace:namespaceforgeneratedclasses nameofschemafile.xsd
Imports System
Imports System.Collections.Generic
Imports System.IO
Imports System.Linq
Imports System.Xml.Serialization
Namespace XmlProc.Binding
Public Class Employee
Public Sub New()
Me.New(0, "", "")
End Sub
Public Sub New(no As Integer, name As String, role As String)
Me.No = no
Me.Name = name
Me.Role = role
End Sub
<XmlAttribute(AttributeName := "no")> _
Public Property No() As Integer
<XmlElement(ElementName := "name")> _
Public Property Name() As String
<XmlElement(ElementName := "role")> _
Public Property Role() As String
Public Overrides Function ToString() As String
Return String.Format("[{0},{1},{2}]", No, Name, Role)
End Function
End Class
<XmlRoot(ElementName := "employees")> _
Public Class Employees
<XmlElement(ElementName := "employee")> _
Public Property EmployeeList() As List(Of Employee)
Public Sub New()
Me.EmployeeList = New List(Of Employee)()
End Sub
Public Overrides Function ToString() As String
Return "{" & String.Join(",", EmployeeList.ConvertAll(Function(e) e.ToString()).ToArray()) & "}"
End Function
End Class
Public Class Program
Private Const XML_FILE As String = "C:\work\employees.xml"
Public Shared Sub Main(args As String())
' setup context
Dim ser As New XmlSerializer(GetType(Employees))
' read from file
Dim employees As Employees = DirectCast(ser.Deserialize(New StreamReader(XML_FILE)), Employees)
' add employee
employees.EmployeeList.Add(New Employee(4, "Dave D", "Intern"))
' write out
Using sw As New StreamWriter("C:\work\employees2.xml")
ser.Serialize(sw, employees)
End Using
Console.ReadKey()
End Sub
End Class
End Namespace
The attributes XmlAttribute and XmlElement simply tell whether data is an attribute or an element. Attribute XmlRoot tell that this is the top class.
If the XML is documented in an XML schema (XSD file), then one does not need to write the data classes manually. Instead they can be generated from the schema.
Command line:
xsd /classes /language:csorvb /namespace:namespaceforgeneratedclasses nameofschemafile.xsd
XML can obviously be written using standard IO library. Streaming write is a simple wrapper on top of standard IO that makes it sligthly easier to write well formed XML.
It is closely related to streaming pull parser - one is get/read and the other is put/write.
package xmlproc;
public class Employee {
private int no;
private String name;
private String role;
public Employee() {
this(0, null, null);
}
public Employee(int no, String name, String role) {
this.no = no;
this.name = name;
this.role = role;
}
public int getNo() {
return no;
}
public void setNo(int no) {
this.no = no;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getRole() {
return role;
}
public void setRole(String role) {
this.role = role;
}
@Override
public String toString() {
return String.format("[%d,%s,%s]", no, name, role);
}
}
package xmlproc;
import java.util.ArrayList;
import java.util.List;
public class Employees {
private List<Employee> list = new ArrayList<>();
public List<Employee> getList(){
return list;
}
public void setList(List<Employee> list) {
this.list = list;
}
@Override
public String toString() {
return list.toString();
}
}
package xmlproc;
import java.io.FileOutputStream;
import java.io.OutputStream;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamWriter;
public class WriteStAX {
public static void main(String[] args) throws Exception {
// create employees
Employees employees = new Employees();
employees.getList().add(new Employee(1, "Alan A", "Manager"));
employees.getList().add(new Employee(2, "Brian B", "Engineer"));
employees.getList().add(new Employee(3, "Chris C", "Sales rep"));
employees.getList().add(new Employee(4, "Dave D", "Intern"));
// write out
XMLOutputFactory xof = XMLOutputFactory.newInstance();
OutputStream os = new FileOutputStream("/work/employees2.xml");
XMLStreamWriter xsw = xof.createXMLStreamWriter(os);
xsw.writeStartDocument();
xsw.writeStartElement("employees");
for(Employee employee : employees.getList()) {
xsw.writeStartElement("employee");
xsw.writeAttribute("no", Integer.toString(employee.getNo()));
xsw.writeStartElement("name");
xsw.writeCharacters(employee.getName());
xsw.writeEndElement();
xsw.writeStartElement("role");
xsw.writeCharacters(employee.getRole());
xsw.writeEndElement();
xsw.writeEndElement();
}
xsw.writeEndElement();
xsw.writeEndDocument();
xsw.close();
os.close();
}
}
Note that it can not output nicely formatted XML.
This is a major flaw in my opinion.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Xml;
namespace XmlProc.Write
{
public class Employee
{
public Employee() : this(0, "", "")
{
}
public Employee(int no, string name, string role)
{
this.No = no;
this.Name = name;
this.Role = role;
}
public int No { get; set; }
public string Name { get; set; }
public string Role { get; set; }
public override string ToString()
{
return string.Format("[{0},{1},{2}]", No, Name, Role);
}
}
public class Employees
{
public List<Employee> EmployeeList { get; set; }
public Employees()
{
this.EmployeeList = new List<Employee>();
}
public override string ToString()
{
return "{" + string.Join(",", EmployeeList.ConvertAll(e => e.ToString()).ToArray()) + "}";
}
}
public class Program
{
public static void Main(string[] args)
{
// create employees
Employees employees = new Employees();
employees.EmployeeList.Add(new Employee(1, "Alan A", "Manager"));
employees.EmployeeList.Add(new Employee(2, "Brian B", "Engineer"));
employees.EmployeeList.Add(new Employee(3, "Chris C", "Sales rep"));
employees.EmployeeList.Add(new Employee(4, "Dave D", "Intern"));
// write out
using(StreamWriter sw = new StreamWriter(@"C:\work\employees2.xml"))
{
XmlWriterSettings xws = new XmlWriterSettings();
xws.Indent = true;
XmlWriter xw = XmlWriter.Create(sw, xws);
xw.WriteStartDocument();
xw.WriteStartElement("employees");
foreach(Employee employee in employees.EmployeeList)
{
xw.WriteStartElement("employee");
xw.WriteAttributeString("no", employee.No.ToString());
xw.WriteStartElement("name");
xw.WriteString(employee.Name);
xw.WriteEndElement();
xw.WriteStartElement("role");
xw.WriteString(employee.Role);
xw.WriteEndElement();
xw.WriteEndElement();
}
xw.WriteEndElement();
xw.WriteEndDocument();
xw.Close();
}
Console.ReadKey();
}
}
}
Imports System
Imports System.Collections.Generic
Imports System.IO
Imports System.Linq
Imports System.Xml
Namespace XmlProc.Write
Public Class Employee
Public Sub New()
Me.New(0, "", "")
End Sub
Public Sub New(no As Integer, name As String, role As String)
Me.No = no
Me.Name = name
Me.Role = role
End Sub
Public Property No() As Integer
Public Property Name() As String
Public Property Role() As String
Public Overrides Function ToString() As String
Return String.Format("[{0},{1},{2}]", No, Name, Role)
End Function
End Class
Public Class Employees
Public Property EmployeeList() As List(Of Employee)
Public Sub New()
Me.EmployeeList = New List(Of Employee)()
End Sub
Public Overrides Function ToString() As String
Return "{" & String.Join(",", EmployeeList.ConvertAll(Function(e) e.ToString()).ToArray()) & "}"
End Function
End Class
Public Class Program
Public Shared Sub Main(args As String())
' create employees
Dim employees As New Employees()
employees.EmployeeList.Add(New Employee(1, "Alan A", "Manager"))
employees.EmployeeList.Add(New Employee(2, "Brian B", "Engineer"))
employees.EmployeeList.Add(New Employee(3, "Chris C", "Sales rep"))
employees.EmployeeList.Add(New Employee(4, "Dave D", "Intern"))
' write out
Using sw As New StreamWriter("C:\work\employees2.xml")
Dim xws As New XmlWriterSettings()
xws.Indent = True
Dim xw As XmlWriter = XmlWriter.Create(sw, xws)
xw.WriteStartDocument()
xw.WriteStartElement("employees")
For Each employee As Employee In employees.EmployeeList
xw.WriteStartElement("employee")
xw.WriteAttributeString("no", employee.No.ToString())
xw.WriteStartElement("name")
xw.WriteString(employee.Name)
xw.WriteEndElement()
xw.WriteStartElement("role")
xw.WriteString(employee.Role)
xw.WriteEndElement()
xw.WriteEndElement()
Next
xw.WriteEndElement()
xw.WriteEndDocument()
xw.Close()
End Using
Console.ReadKey()
End Sub
End Class
End Namespace
<?php
class Employee {
public $no;
public $name;
public $role;
public function __construct($no = 0, $name = '', $role = '') {
$this->no = $no;
$this->name = $name;
$this->role = $role;
}
function __toString() {
return sprintf('[%d,%s,%s]', $this->no, $this->name, $this->role);
}
}
class Employees {
public $list;
public function __construct() {
$this->list = array();
}
function __toString() {
return '{' . implode(',', $this->list) . '}';
}
}
// create employees
$employees = new Employees();
$employees->list[] = new Employee(1, 'Alan A', 'Manager');
$employees->list[] = new Employee(2, 'Brian B', 'Engineer');
$employees->list[] = new Employee(3, 'Chris C', 'Sales rep');
$employees->list[] = new Employee(4, 'Dave D', 'Intern');
// write out
$wrt = new XMLWriter();
$wrt->openURI('php://output');
$wrt->setIndent(true);
$wrt->startDocument();
$wrt->startElement('employees');
foreach($employees->list as $employee) {
$wrt->startElement('employee');
$wrt->startAttribute('no');
$wrt->text($employee->no);
$wrt->endAttribute();
$wrt->startElement('name');
$wrt->text($employee->name);
$wrt->endElement();
$wrt->startElement('role');
$wrt->text($employee->role);
$wrt->endElement();
$wrt->endElement();
}
$wrt->endElement();
$wrt->endDocument();
?>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlwriter.h>
struct employee
{
int no;
char *name;
char *role;
};
int main()
{
struct employee employees[] = { { 1, "Alan A", "Manager" },
{ 2, "Brian B", "Engineer" },
{ 3, "Chris C", "Sales rep"},
{ 4, "Dave D", "Intern"} };
xmlTextWriter *xw;
char temp[20];
int i;
xw = xmlNewTextWriterFilename("employees2.xml", 0);
if(xw != NULL)
{
xmlTextWriterSetIndent(xw, 1);
xmlTextWriterStartDocument(xw, "1.0", "UTF-8", "Yes");
xmlTextWriterStartElement(xw, "employees");
for(i = 0; i < 4; i++)
{
xmlTextWriterStartElement(xw, "employee");
xmlTextWriterWriteAttribute(xw, "no", itoa(employees[i].no, temp, 10));
xmlTextWriterStartElement(xw, "name");
xmlTextWriterWriteString(xw, employees[i].name);
xmlTextWriterEndElement(xw);
xmlTextWriterStartElement(xw, "role");
xmlTextWriterWriteString(xw, employees[i].role);
xmlTextWriterEndElement(xw);
xmlTextWriterEndElement(xw);
}
xmlTextWriterEndElement(xw);
xmlTextWriterEndDocument(xw);
xmlFreeTextWriter(xw);
}
else
{
fprintf(stderr, "Unable to create XML file\n");
}
return EXIT_SUCCESS;
}
Windows GCC build:
gcc -m32 -Wall -Wno-pointer-sign -I%ICONVPATH%\include -I%LIBXML2PATH%\include\libxml2 -I%LIBXSLTPATH%\include %1.c -L%LIBXML2PATH%\lib -lxml2 -L%ICONVPATH%\lib -liconv -L%ZLIBPATH%\lib -lz -L%LIBXSLTPATH%\lib -lxslt -o %1.exe
#include <iostream>
using namespace std;
#include <QtCore/QList>
#include <QtCore/QFile>
#include <QtCore/QXmlStreamWriter>
class Employee
{
private:
int no;
QString name;
QString role;
public:
Employee(int no, QString name, QString role);
int GetNo();
QString GetName();
QString GetRole();
};
Employee::Employee(int no, QString name, QString role)
{
this->no = no;
this->name = name;
this->role = role;
}
int Employee::GetNo()
{
return no;
}
QString Employee::GetName()
{
return name;
}
QString Employee::GetRole()
{
return role;
}
void realmain()
{
// create employees
QList<Employee *> employees;
employees.append(new Employee(1, "Alan A", "Manager"));
employees.append(new Employee(2, "Brian B", "Engineer"));
employees.append(new Employee(3, "Chris C", "Sales rep"));
employees.append(new Employee(4, "Dave D", "Intern"));
// write out
QFile file("employees2.xml");
file.open(QIODevice::WriteOnly);
QXmlStreamWriter xsw(&file);
xsw.setAutoFormatting(true);
xsw.writeStartDocument();
xsw.writeStartElement("employees");
for(int i = 0; i < employees.size(); i++) {
Employee *employee = employees[i];
xsw.writeStartElement("employee");
xsw.writeAttribute("no", QString::number(employee->GetNo()));
xsw.writeStartElement("name");
xsw.writeCharacters(employee->GetName());
xsw.writeEndElement();
xsw.writeStartElement("role");
xsw.writeCharacters(employee->GetRole());
xsw.writeEndElement();
xsw.writeEndElement();
}
xsw.writeEndElement();
xsw.writeEndDocument();
file.close();
}
XML serialization is the text equivalent of binary serialization. Objects can be serialized to XML and that XML can be deserialized to objects again.
Pro's:
Con's:
package xmlproc;
public class Employee {
private int no;
private String name;
private String role;
public Employee() {
this(0, null, null);
}
public Employee(int no, String name, String role) {
this.no = no;
this.name = name;
this.role = role;
}
public int getNo() {
return no;
}
public void setNo(int no) {
this.no = no;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getRole() {
return role;
}
public void setRole(String role) {
this.role = role;
}
@Override
public String toString() {
return String.format("[%d,%s,%s]", no, name, role);
}
}
package xmlproc;
import java.util.ArrayList;
import java.util.List;
public class Employees {
private List<Employee> list = new ArrayList<>();
public List<Employee> getList(){
return list;
}
public void setList(List<Employee> list) {
this.list = list;
}
@Override
public String toString() {
return list.toString();
}
}
package xmlproc;
import java.beans.XMLDecoder;
import java.beans.XMLEncoder;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
public class JavaBeansSer {
public static void main(String[] args) throws Exception {
// create employees
Employees employees = new Employees();
employees.getList().add(new Employee(1, "Alan A", "Manager"));
employees.getList().add(new Employee(2, "Brian B", "Engineer"));
employees.getList().add(new Employee(3, "Chris C", "Sales rep"));
employees.getList().add(new Employee(4, "Dave D", "Intern"));
// write out
OutputStream os = new FileOutputStream("/work/employees2.xml");
XMLEncoder enc = new XMLEncoder(os);
enc.writeObject(employees);
enc.close();
// read in
InputStream is = new FileInputStream("/work/employees2.xml");
XMLDecoder dec = new XMLDecoder(is);
Employees copy = (Employees)dec.readObject();
dec.close();
System.out.println(copy);
}
}
Generated XML looks like:
<?xml version="1.0" encoding="UTF-8"?>
<java version="1.8.0_91" class="java.beans.XMLDecoder">
<object class="xmlproc.Employees" id="Employees0">
<void property="list">
<void method="add">
<object class="xmlproc.Employee">
<void property="name">
<string>Alan A</string>
</void>
<void property="no">
<int>1</int>
</void>
<void property="role">
<string>Manager</string>
</void>
</object>
</void>
<void method="add">
<object class="xmlproc.Employee">
<void property="name">
<string>Brian B</string>
</void>
<void property="no">
<int>2</int>
</void>
<void property="role">
<string>Engineer</string>
</void>
</object>
</void>
<void method="add">
<object class="xmlproc.Employee">
<void property="name">
<string>Chris C</string>
</void>
<void property="no">
<int>3</int>
</void>
<void property="role">
<string>Sales rep</string>
</void>
</object>
</void>
<void method="add">
<object class="xmlproc.Employee">
<void property="name">
<string>Dave D</string>
</void>
<void property="no">
<int>4</int>
</void>
<void property="role">
<string>Intern</string>
</void>
</object>
</void>
</void>
</object>
</java>
The format sure makes sense for Java, but not for any other programming language.
package xmlproc;
public class Employee {
private int no;
private String name;
private String role;
public Employee() {
this(0, null, null);
}
public Employee(int no, String name, String role) {
this.no = no;
this.name = name;
this.role = role;
}
public int getNo() {
return no;
}
public void setNo(int no) {
this.no = no;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getRole() {
return role;
}
public void setRole(String role) {
this.role = role;
}
@Override
public String toString() {
return String.format("[%d,%s,%s]", no, name, role);
}
}
package xmlproc;
import java.util.ArrayList;
import java.util.List;
public class Employees {
private List<Employee> list = new ArrayList<>();
public List<Employee> getList(){
return list;
}
public void setList(List<Employee> list) {
this.list = list;
}
@Override
public String toString() {
return list.toString();
}
}
package xmlproc;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import com.thoughtworks.xstream.XStream;
import com.thoughtworks.xstream.io.xml.DomDriver;
public class XStreamSer {
public static void main(String[] args) throws Exception {
// create employees
Employees employees = new Employees();
employees.getList().add(new Employee(1, "Alan A", "Manager"));
employees.getList().add(new Employee(2, "Brian B", "Engineer"));
employees.getList().add(new Employee(3, "Chris C", "Sales rep"));
employees.getList().add(new Employee(4, "Dave D", "Intern"));
// setup XStream
XStream xs = new XStream(new DomDriver());
xs.alias("employees", Employees.class);
xs.alias("employee", Employee.class);
// write out
OutputStream os = new FileOutputStream("/work/employees2.xml");
xs.toXML(employees, os);
os.close();
// read in
InputStream is = new FileInputStream("/work/employees2.xml");
Employees copy = (Employees)xs.fromXML(is);
is.close();
System.out.println(copy);
}
}
Generated XML looks like:
<employees>
<list>
<employee>
<no>1</no>
<name>Alan A</name>
<role>Manager</role>
</employee>
<employee>
<no>2</no>
<name>Brian B</name>
<role>Engineer</role>
</employee>
<employee>
<no>3</no>
<name>Chris C</name>
<role>Sales rep</role>
</employee>
<employee>
<no>4</no>
<name>Dave D</name>
<role>Intern</role>
</employee>
</list>
</employees>
Which is actually a pretty nice format that could make sense in other programming languages as well.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Xml.Serialization;
namespace XmlProc.Serialize
{
public class Employee
{
public Employee() : this(0, "", "")
{
}
public Employee(int no, string name, string role)
{
this.No = no;
this.Name = name;
this.Role = role;
}
public int No { get; set; }
public string Name { get; set; }
public string Role { get; set; }
public override string ToString()
{
return string.Format("[{0},{1},{2}]", No, Name, Role);
}
}
public class Employees
{
public List<Employee> EmployeeList { get; set; }
public Employees()
{
this.EmployeeList = new List<Employee>();
}
public override string ToString()
{
return "{" + string.Join(",", EmployeeList.ConvertAll(e => e.ToString()).ToArray()) + "}";
}
}
public class Program
{
public static void Main(string[] args)
{
// create employees
Employees employees = new Employees();
employees.EmployeeList.Add(new Employee(1, "Alan A", "Manager"));
employees.EmployeeList.Add(new Employee(2, "Brian B", "Engineer"));
employees.EmployeeList.Add(new Employee(3, "Chris C", "Sales rep"));
employees.EmployeeList.Add(new Employee(4, "Dave D", "Intern"));
// setup context
XmlSerializer ser = new XmlSerializer(typeof(Employees));
// write out
using(StreamWriter sw = new StreamWriter(@"C:\work\employees2.xml"))
{
ser.Serialize(sw, employees);
}
// read in
Employees copy;
using(StreamReader sr = new StreamReader(@"C:\work\employees2.xml"))
{
copy = (Employees)ser.Deserialize(sr);
}
Console.WriteLine(copy);
Console.ReadKey();
}
}
}
Generated XML looks like:
<?xml version="1.0" encoding="utf-8"?>
<Employees xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<EmployeeList>
<Employee>
<No>1</No>
<Name>Alan A</Name>
<Role>Manager</Role>
</Employee>
<Employee>
<No>2</No>
<Name>Brian B</Name>
<Role>Engineer</Role>
</Employee>
<Employee>
<No>3</No>
<Name>Chris C</Name>
<Role>Sales rep</Role>
</Employee>
<Employee>
<No>4</No>
<Name>Dave D</Name>
<Role>Intern</Role>
</Employee>
</EmployeeList>
</Employees>
Which is actually a pretty nice format that could make sense in other programming languages as well.
Imports System
Imports System.Collections.Generic
Imports System.IO
Imports System.Linq
Imports System.Xml.Serialization
Namespace XmlProc.Serialize
Public Class Employee
Public Sub New()
Me.New(0, "", "")
End Sub
Public Sub New(no As Integer, name As String, role As String)
Me.No = no
Me.Name = name
Me.Role = role
End Sub
Public Property No() As Integer
Public Property Name() As String
Public Property Role() As String
Public Overrides Function ToString() As String
Return String.Format("[{0},{1},{2}]", No, Name, Role)
End Function
End Class
Public Class Employees
Public Property EmployeeList() As List(Of Employee)
Public Sub New()
Me.EmployeeList = New List(Of Employee)()
End Sub
Public Overrides Function ToString() As String
Return "{" & String.Join(",", EmployeeList.ConvertAll(Function(e) e.ToString()).ToArray()) & "}"
End Function
End Class
Public Class Program
Public Shared Sub Main(args As String())
' create employees
Dim employees As New Employees()
employees.EmployeeList.Add(New Employee(1, "Alan A", "Manager"))
employees.EmployeeList.Add(New Employee(2, "Brian B", "Engineer"))
employees.EmployeeList.Add(New Employee(3, "Chris C", "Sales rep"))
employees.EmployeeList.Add(New Employee(4, "Dave D", "Intern"))
' setup context
Dim ser As New XmlSerializer(GetType(Employees))
' write out
Using sw As New StreamWriter("C:\work\employees2.xml")
ser.Serialize(sw, employees)
End Using
' read in
Dim copy As Employees
Using sr As New StreamReader("C:\work\employees2.xml")
copy = DirectCast(ser.Deserialize(sr), Employees)
End Using
Console.WriteLine(copy)
Console.ReadKey()
End Sub
End Class
End Namespace
Generated XML looks like:
<?xml version="1.0" encoding="utf-8"?>
<Employees xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<EmployeeList>
<Employee>
<No>1</No>
<Name>Alan A</Name>
<Role>Manager</Role>
</Employee>
<Employee>
<No>2</No>
<Name>Brian B</Name>
<Role>Engineer</Role>
</Employee>
<Employee>
<No>3</No>
<Name>Chris C</Name>
<Role>Sales rep</Role>
</Employee>
<Employee>
<No>4</No>
<Name>Dave D</Name>
<Role>Intern</Role>
</Employee>
</EmployeeList>
</Employees>
Which is actually a pretty nice format that could make sense in other programming languages as well.
<?php
require_once 'XML/Serializer.php';
require_once 'XML/Unserializer.php';
class Employee {
public $no;
public $name;
public $role;
public function __construct($no = 0, $name = '', $role = '') {
$this->no = $no;
$this->name = $name;
$this->role = $role;
}
function __toString() {
return sprintf('[%d,%s,%s]', $this->no, $this->name, $this->role);
}
}
class Employees {
public $list;
public function __construct() {
$this->list = array();
}
function __toString() {
return '{' . implode(',', $this->list) . '}';
}
}
// create employees
$employees = new Employees();
$employees->list[] = new Employee(1, 'Alan A', 'Manager');
$employees->list[] = new Employee(2, 'Brian B', 'Engineer');
$employees->list[] = new Employee(3, 'Chris C', 'Sales rep');
$employees->list[] = new Employee(4, 'Dave D', 'Intern');
// write out
$serializer = new XML_Serializer(array('indent' => ' ', 'typeHints' => true));
$serializer->serialize($employees);
$xml = $serializer->getSerializedData();
echo $xml;
// read in
$unserializer = new XML_Unserializer(array());
$unserializer->unserialize($xml);
$o = $unserializer->getUnserializedData();
echo $o;
?>
Generated XML looks like:
<Employees _class="Employees" _type="object">
<list _type="array">
<XML_Serializer_Tag _class="Employee" _originalKey="0" _type="object">
<no _type="integer">1</no>
<name _type="string">Alan A</name>
<role _type="string">Manager</role>
</XML_Serializer_Tag>
<XML_Serializer_Tag _class="Employee" _originalKey="1" _type="object">
<no _type="integer">2</no>
<name _type="string">Brian B</name>
<role _type="string">Engineer</role>
</XML_Serializer_Tag>
<XML_Serializer_Tag _class="Employee" _originalKey="2" _type="object">
<no _type="integer">3</no>
<name _type="string">Chris C</name>
<role _type="string">Sales rep</role>
</XML_Serializer_Tag>
<XML_Serializer_Tag _class="Employee" _originalKey="3" _type="object">
<no _type="integer">4</no>
<name _type="string">Dave D</name>
<role _type="string">Intern</role>
</XML_Serializer_Tag>
</list>
</Employees>
The format sure makes sense for PHP, but not for any other programming language.
XPath is an expression language for selcting from XML. Or to put it in another way: XPath is to XML what SQL is to relational databases.
Any serious developer should know XPath.
XPath syntax:
package xmlproc;
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class W3CDOM_XPath {
private final static String XML_FILE = "/work/employees.xml";
public static void main(String[] args) throws Exception {
// read from file to DOM tree
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new File(XML_FILE));
// setup XPath
XPath xpath = XPathFactory.newInstance().newXPath();
// iterate over all employee elements
NodeList employees = (NodeList)xpath.evaluate("//employee[@no>1 and role!='Intern']", doc.getDocumentElement(), XPathConstants.NODESET);
for (int i = 0; i < employees.getLength(); i++) {
Element employee = (Element)employees.item(i);
// find attribute no
String no = (String)xpath.evaluate("@no", employee, XPathConstants.STRING);
// find sub elements name and role
String name = (String)xpath.evaluate("name/text()", employee, XPathConstants.STRING);
String role = (String)xpath.evaluate("role/text()", employee, XPathConstants.STRING);
// print
System.out.println("no=" + no);
System.out.println("name=" + name);
System.out.println("role=" + role);
}
}
}
XPath API in JDOM is not very well implemented, and I don't think anyone will want to use that, so I will not show that.
package xmlproc;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
public class DOM4J_XPath {
private final static String XML_FILE = "/work/employees.xml";
@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
// read from file to DOM tree
SAXReader xmlReader = new SAXReader();
Document doc = xmlReader.read(XML_FILE);
// iterate over all employee elements
for(Element employee : (List<Element>)doc.getRootElement().selectNodes("//employee[@no>1 and role!='Intern']")) {
// find attribute no
String no = employee.selectSingleNode("@no").getText();
// find sub elements name and role
String name = employee.selectSingleNode("name/text()").getText();
String role = employee.selectSingleNode("role/text()").getText();
// print
System.out.println("no=" + no);
System.out.println("name=" + name);
System.out.println("role=" + role);
}
}
}
using System;
using System.IO;
using System.Xml;
namespace XmlProc.XPath
{
public class Program
{
private const String XML_FILE = @"C:\work\employees.xml";
public static void Main(string[] args)
{
// read from file to DOM tree
XmlDocument doc = new XmlDocument();
doc.Load(XML_FILE);
// iterate over all employee elements
foreach(XmlElement employee in doc.SelectNodes("//employee[@no>1 and role!='Intern']"))
{
// find attribute no
string no = employee.SelectSingleNode("@no").Value;
// find sub elements name and role
string name = employee.SelectSingleNode("name/text()").Value;
string role = employee.SelectSingleNode("role/text()").Value;
// print
Console.WriteLine("no = " + no);
Console.WriteLine("name = " + name);
Console.WriteLine("role = " + role);
}
Console.ReadKey();
}
}
}
Imports System
Imports System.IO
Imports System.Xml
Namespace XmlProc.XPath
Public Class Program
Private Const XML_FILE As String = "C:\work\employees.xml"
Public Shared Sub Main(args As String())
' read from file to DOM tree
Dim doc As New XmlDocument()
doc.Load(XML_FILE)
' iterate over all employee elements
For Each employee As XmlElement In doc.SelectNodes("//employee[@no>1 and role!='Intern']")
' find attribute no
Dim no As String = employee.SelectSingleNode("@no").Value
' find sub elements name and role
Dim name As String = employee.SelectSingleNode("name/text()").Value
Dim role As String = employee.SelectSingleNode("role/text()").Value
' print
Console.WriteLine("no = " & no)
Console.WriteLine("name = " & name)
Console.WriteLine("role = " & role)
Next
Console.ReadKey()
End Sub
End Class
End Namespace
<html>
<head>
<title>W3C DOM XPath</title>
</head>
<body>
<h1>W3C DOM XPath</h1>
<table>
<tr>
<th>No</th>
<th>Name</th>
<th>Role</th>
</tr>
<?php
define('XML_FILE','/work/employees.xml');
// read from file to DOM tree
$doc = new DOMDocument();
$doc->load(XML_FILE);
// setup XPath
$xpath = new DOMXPath($doc);
// iterate over all employee elements
$employees = $xpath->query("//employee[@no>1 and role!='Intern']", $doc);
foreach($employees as $employee) {
// find attribute no
$no = $xpath->query("@no", $employee)->item(0)->nodeValue;
// find sub elements name and role
$name = $xpath->query("name/text()", $employee)->item(0)->nodeValue;
$role = $xpath->query("role/text()", $employee)->item(0)->nodeValue;
// print
echo "<tr>\r\n";
echo "<td>$no</td>\r\n";
echo "<td>$name</td>\r\n";
echo "<td>$role</td>\r\n";
echo "</tr>\r\n";
}
?>
</table>
</body>
</html>
<html>
<head>
<title>SimpleXML XPath</title>
</head>
<body>
<h1>SimpleXML XPath</h1>
<table>
<tr>
<th>No</th>
<th>Name</th>
<th>Role</th>
</tr>
<?php
define('XML_FILE','/work/employees.xml');
// read from file to pseudo DOM tree
$xml = new SimpleXMLElement(file_get_contents(XML_FILE));
// iterate over all employee elements
foreach($xml->xpath("//employee[@no>1 and role!='Intern']") as $employee) {
// find attribute no
$no = $employee->xpath("@no")[0];
// find sub elements name and role
$name = $employee->xpath("name/text()")[0];
$role = $employee->xpath("role/text()")[0];
// print
echo "<tr>\r\n";
echo "<td>$no</td>\r\n";
echo "<td>$name</td>\r\n";
echo "<td>$role</td>\r\n";
echo "</tr>\r\n";
}
?>
</table>
</body>
</html>
<html>
<head>
<title>W3C DOM XPath</title>
</head>
<body>
<h1>W3C DOM XPath</h1>
<table>
<tr>
<th>No</th>
<th>Name</th>
<th>Role</th>
</tr>
<%
' read from file to DOM tree
Set doc = CreateObject("MSXML.DOMDocument")
doc.Async = False
doc.Load("C:\work\employees.xml")
' iterate over all employee elements
Set employees = doc.SelectNodes("//employee[@no>1 and role!='Intern']")
For Each employee in employees
' find attribute no
no = employee.SelectSingleNode("@no").NodeValue
' find sub elements name and role
name = employee.SelectSingleNode("name/text()").NodeValue
role = employee.SelectSingleNode("role/text()").NodeValue
' print
Response.Write "<tr>" & vbCrLf
Response.Write "<td>" & no & "</td>" & vbCrLf
Response.Write "<td>" & name & "</td>" & vbCrLf
Response.Write "<td>" & role & "</td>" & vbCrLf
Response.Write "</tr>" & vbCrLf
Next
Response.Write "</table>" & vbCrLf
' cleanup
Set employees = Nothing
Set doc = Nothing
%>
</body>
</html>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xpath.h>
#define XML_FILE "employees.xml"
int main()
{
xmlDoc *doc;
xmlXPathContext *ctx;
xmlXPathObject *res;
xmlXPathObject *res2;
xmlNodeSet *employees;
xmlNode *employee;
xmlChar *no;
xmlChar *name;
xmlChar *role;
int i;
/* read from file to DOM tree */
doc = xmlReadFile(XML_FILE, NULL, 0);
if (doc == NULL)
{
fprintf(stderr, "Failed to parse %s\n", XML_FILE);
return EXIT_FAILURE;
}
/* setup XPath */
ctx = xmlXPathNewContext(doc);
/* iterate over all employee elements */
res = xmlXPathEval("//employee[@no>1 and role!='Intern']", ctx);
employees = res->nodesetval;
if(!xmlXPathNodeSetIsEmpty(employees))
{
for(i = 0; i < xmlXPathNodeSetGetLength(employees); i++)
{
employee = xmlXPathNodeSetItem(employees, i);
// find attribute no
res2 = xmlXPathNodeEval(employee, "@no", ctx);
no = xmlXPathCastNodeSetToString(res2->nodesetval);
// find sub elements name and role
res2 = xmlXPathNodeEval(employee, "name/text()", ctx);
name = xmlXPathCastNodeSetToString(res2->nodesetval);
res2 = xmlXPathNodeEval(employee, "role/text()", ctx);
role = xmlXPathCastNodeSetToString(res2->nodesetval);
/* print */
printf("no=%s\n", no);
printf("name=%s\n", name);
printf("role=%s\n", role);
}
}
/* clean up */
xmlXPathFreeObject(res);
xmlXPathFreeContext(ctx);
xmlFreeDoc(doc);
xmlCleanupParser();
/* */
return EXIT_SUCCESS;
}
Windows GCC build:
gcc -m32 -Wall -Wno-pointer-sign -I%ICONVPATH%\include -I%LIBXML2PATH%\include\libxml2 -I%LIBXSLTPATH%\include %1.c -L%LIBXML2PATH%\lib -lxml2 -L%ICONVPATH%\lib -liconv -L%ZLIBPATH%\lib -lz -L%LIBXSLTPATH%\lib -lxslt -o %1.exe
Note that xmlXPathNodeEval requires a relative new version of LIBXML2.
// standard C++ headers
#include <iostream>
#include <cstdlib>
using namespace std;
// Windows headers
#include <windows.h>
#include <tchar.h>
#include <comutil.h>
#include <atlconv.h>
#include <msxml.h>
const wchar_t *XML_FILE = L"employees.xml";
void ReturnCheck(LPTSTR func, HRESULT res)
{
if(res != S_OK)
{
TCHAR buffer[1000];
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, res, 0, buffer, sizeof(buffer), 0);
cout << func << ": " << buffer << endl;
exit(1);
}
}
int main()
{
USES_CONVERSION;
HRESULT res;
VARIANT_BOOL res2;
//
CoInitialize(NULL);
// read from file to DOM tree
IXMLDOMDocument *doc;
res = CoCreateInstance(CLSID_DOMDocument, NULL, CLSCTX_INPROC_SERVER, IID_IXMLDOMDocument, (void**)&doc);
ReturnCheck(_T("CoCreateInstance"), res);
doc->put_async(false);
res = doc->load(_variant_t(XML_FILE), &res2);
ReturnCheck(_T("load"), res);
// iterate over all employee elements
IXMLDOMNodeList *employees;
res = doc->selectNodes(L"//employee[@no>1 and role!='Intern']", &employees);
ReturnCheck(_T("selectNodes"), res);
long len;
res = employees->get_length(&len);
ReturnCheck(_T("get_length"), res);
IXMLDOMNode *xemployee;
for(int i = 0; i < len; i++)
{
res = employees->get_item(i, &xemployee);
ReturnCheck(_T("get_item"), res);
IXMLDOMElement *employee;
res = xemployee->QueryInterface(__uuidof(IXMLDOMElement), (void **)&employee);
ReturnCheck(_T("QueryInterface"), res);
// find attribute no
IXMLDOMNode *nonode;
res = xemployee->selectSingleNode(L"@no", &nonode);
ReturnCheck(_T("selectSingleNode"), res);
_variant_t xno;
res = nonode->get_nodeValue(&xno);
ReturnCheck(_T("get_nodeValue"), res);
int no = atoi(W2A((wchar_t *)(_bstr_t)xno));
// find sub elements name and role
IXMLDOMNode *namenode;
res = xemployee->selectSingleNode(L"name/text()", &namenode);
ReturnCheck(_T("selectSingleNode"), res);
_variant_t xname;
res = namenode->get_nodeValue(&xname);
ReturnCheck(_T("get_nodeValue"), res);
char *name = W2A((wchar_t *)(_bstr_t)xname);
IXMLDOMNode *rolenode;
res = xemployee->selectSingleNode(L"role/text()", &rolenode);
ReturnCheck(_T("selectSingleNode"), res);
_variant_t xrole;
res = rolenode->get_nodeValue(&xrole);
ReturnCheck(_T("get_nodeValue"), res);
char *role = W2A((wchar_t *)(_bstr_t)xrole);
// print
cout << "no = " << no << endl;
cout << "name = " << name << endl;
cout << "role = " << role << endl;
//
nonode->Release();
nonode = NULL;
namenode->Release();
namenode = NULL;
rolenode->Release();
rolenode = NULL;
employee->Release();
employee = NULL;
}
// cleanup
employees->Release();
employees = NULL;
doc->Release();
doc = NULL;
//
CoUninitialize();
return 0;
}
program XPathEx;
uses
Classes, DOM, XMLRead, XMLWrite, XPath;
const
XML_FILE = 'employees.xml';
var
doc : TXMLDocument;
employees : TNodeSet;
employee : TDOMNode;
no, name, role : WideString;
i : integer;
begin
(* read from file to DOM tree *)
ReadXMLfile(doc, XML_FILE);
(* iterate over all employee elements *)
employees := EvaluateXPathExpression('//employee[@no>1 and role!="Intern"]', doc.DocumentElement).AsNodeSet;
for i := 0 to employees.Count - 1 do begin
employee := TDOMNode(employees.Items[i]);
(* find attribute no *)
no := employee.Attributes.GetNamedItem('no').NodeValue;
(* find sub elements name and role *)
name := EvaluateXPathExpression('name/text()', employee).AsText;
role := EvaluateXPathExpression('role/text()', employee).AsText;
(* print *)
writeln('no=', no);
writeln('name=', name);
writeln('role=', role);
end;
(* *)
doc.Free;
end.
XSL is a transformation language for XML (and is an XML format itself). XSLT is transformation of XML using XSL.
XSL can be a little tricky to learn for developers as it is a declarative language and not a sequential language.
XSL is a big topic so I will not describe it in detail just provide a small example.
employees.xsl:
<?xml version='1.0'?>
<xsl:stylesheet xmlns:xsl='http://www.w3.org/1999/XSL/Transform' version='1.0'>
<xsl:template match='/'>
<allemployees>
<xsl:for-each select='//employee'>
<oneemployee>
<no><xsl:value-of select='@no'/></no>
<name><xsl:value-of select='name'/></name>
<job><xsl:value-of select='role'/></job>
</oneemployee>
</xsl:for-each>
</allemployees>
</xsl:template>
</xsl:stylesheet>
It does a simple transformation from XML to XML:
Note that XSL uses XPath!
employeeshtml.xsl:
<?xml version='1.0'?>
<xsl:stylesheet xmlns:xsl='http://www.w3.org/1999/XSL/Transform' version='1.0'>
<xsl:output method='html'/>
<xsl:template match='/'>
<html>
<head>
<title>XSLT demo</title>
</head>
<body>
<h2>XSLT demo</h2>
<table>
<tr>
<th>No</th>
<th>Name</th>
<th>Role</th>
</tr>
<xsl:for-each select='//employee'>
<tr>
<td><xsl:value-of select='@no'/></td>
<td><xsl:value-of select='name'/></td>
<td><xsl:value-of select='role'/></td>
</tr>
</xsl:for-each>
</table>
</body>
</html>
</xsl:template>
</xsl:stylesheet>
It does a simple transformation from XML to HTML (formats result as a HTML table).
package xmlproc;
import java.io.File;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
public class XSLT {
private final static String XML_FILE = "/work/employees.xml";
private final static String XSL_FILE = "/work/employees.xsl";
public static void main(String[] args) throws Exception {
// transform from one file to another file
TransformerFactory tf = TransformerFactory.newInstance();
Transformer t = tf.newTransformer(new StreamSource(new File(XSL_FILE)));
t.setOutputProperty(OutputKeys.INDENT, "yes");
t.transform(new StreamSource(new File(XML_FILE)), new StreamResult(new File("/work/employees2.xml")));
}
}
package xmlproc;
import java.io.File;
import java.io.FileWriter;
import java.io.Writer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamSource;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSOutput;
import org.w3c.dom.ls.LSSerializer;
public class W3CDOM_XSLT {
private final static String XML_FILE = "/work/employees.xml";
private final static String XSL_FILE = "/work/employees.xsl";
public static void main(String[] args) throws Exception {
// read from file to DOM tree
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new File(XML_FILE));
// transform from one dom to another dom
TransformerFactory tf = TransformerFactory.newInstance();
Transformer t = tf.newTransformer(new StreamSource(new File(XSL_FILE)));
Document doc2 = db.newDocument();
t.transform(new DOMSource(doc), new DOMResult(doc2));
// write out
DOMImplementation impl = DOMImplementationRegistry.newInstance().getDOMImplementation("XML 3.0");
DOMImplementationLS feature = (DOMImplementationLS)impl.getFeature("LS","3.0");
LSSerializer ser = feature.createLSSerializer();
ser.getDomConfig().setParameter("format-pretty-print", true);
LSOutput output = feature.createLSOutput();
Writer w = new FileWriter("/work/employees2.xml");
output.setCharacterStream(w);
ser.write(doc2, output);
w.close();
}
}
package xmlproc;
import java.io.FileWriter;
import java.io.Writer;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import org.jdom2.transform.JDOMResult;
import org.jdom2.transform.JDOMSource;
import org.jdom2.Document;
import org.jdom2.input.SAXBuilder;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;
public class JDOM2_XSLT {
private final static String XML_FILE = "/work/employees.xml";
private final static String XSL_FILE = "/work/employees.xsl";
public static void main(String[] args) throws Exception {
// read from file to DOM tree
SAXBuilder b = new SAXBuilder();
Document doc = b.build(XML_FILE);
// transform from one dom to another dom
Document xsl = b.build(XSL_FILE);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer t = tf.newTransformer(new JDOMSource(xsl));
JDOMResult res = new JDOMResult();
t.transform(new JDOMSource(doc), res);
Document doc2 = res.getDocument();
// write out
XMLOutputter fmt = new XMLOutputter(Format.getPrettyFormat());
Writer w = new FileWriter("/work/employees2.xml");
w.write(fmt.outputString(doc2));
w.close();
}
}
package xmlproc;
import java.io.File;
import java.io.FileOutputStream;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamSource;
import org.dom4j.Document;
import org.dom4j.io.DocumentResult;
import org.dom4j.io.DocumentSource;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
public class DOM4J_XSLT {
private final static String XML_FILE = "/work/employees.xml";
private final static String XSL_FILE = "/work/employees.xsl";
public static void main(String[] args) throws Exception {
// read from file to DOM tree
SAXReader xmlReader = new SAXReader();
Document doc = xmlReader.read(XML_FILE);
// transform from one dom to another dom
TransformerFactory tf = TransformerFactory.newInstance();
Transformer t = tf.newTransformer(new StreamSource(new File(XSL_FILE)));
DocumentResult res = new DocumentResult();
t.transform(new DocumentSource(doc), res);
Document doc2 = res.getDocument();
// write out
OutputFormat of = OutputFormat.createPrettyPrint();
FileOutputStream fos = new FileOutputStream("/work/employees2.xml");
XMLWriter xw = new XMLWriter(fos, of);
xw.write(doc2);
xw.close();
fos.close();
}
}
using System;
using System.IO;
using System.Xml;
using System.Xml.Xsl;
namespace XmlProc.XSLT
{
public class Program
{
private const string XML_FILE = @"C:\work\employees.xml";
private const string XSL_FILE = @"C:\work\employees.xsl";
public static void Main(string[] args)
{
// transform from one file to another file
XslCompiledTransform xslt = new XslCompiledTransform();
xslt.Load(XSL_FILE);
using(StreamWriter sw = new StreamWriter(@"C:\work\employees2.xml"))
{
XmlWriterSettings xws = new XmlWriterSettings();
xws.Indent = true;
XmlWriter xw = XmlWriter.Create(sw, xws);
xslt.Transform(XML_FILE, xw);
}
Console.ReadKey();
}
}
}
using System;
using System.IO;
using System.Xml;
using System.Xml.Xsl;
namespace XmlProc.XSLT
{
public class Program
{
private const string XML_FILE = @"C:\work\employees.xml";
private const string XSL_FILE = @"C:\work\employees.xsl";
public static void Main(string[] args)
{
// read from file to DOM tree
XmlDocument doc = new XmlDocument();
doc.Load(XML_FILE);
// transform from DOM to another file
XslCompiledTransform xslt = new XslCompiledTransform();
xslt.Load(XSL_FILE);
using(StreamWriter sw = new StreamWriter(@"C:\work\employees2.xml"))
{
XmlWriterSettings xws = new XmlWriterSettings();
xws.Indent = true;
XmlWriter xw = XmlWriter.Create(sw, xws);
xslt.Transform(doc, xw);
}
Console.ReadKey();
}
}
}
Imports System
Imports System.IO
Imports System.Xml
Imports System.Xml.Xsl
Namespace XmlProc.XSLT
Public Class Program
Private Const XML_FILE As String = "C:\work\employees.xml"
Private Const XSL_FILE As String = "C:\work\employees.xsl"
Public Shared Sub Main(args As String())
' transform from one file to another file
Dim xslt As New XslCompiledTransform()
xslt.Load(XSL_FILE)
Using sw As New StreamWriter("C:\work\employees2.xml")
Dim xws As New XmlWriterSettings()
xws.Indent = True
Dim xw As XmlWriter = XmlWriter.Create(sw, xws)
xslt.Transform(XML_FILE, xw)
End Using
Console.ReadKey()
End Sub
End Class
End Namespace
Imports System
Imports System.IO
Imports System.Xml
Imports System.Xml.Xsl
Namespace XmlProc.XSLT
Public Class Program
Private Const XML_FILE As String = "C:\work\employees.xml"
Private Const XSL_FILE As String = "C:\work\employees.xsl"
Public Shared Sub Main(args As String())
' read from file to DOM tree
Dim doc As New XmlDocument()
doc.Load(XML_FILE)
' transform from DOM to another file
Dim xslt As New XslCompiledTransform()
xslt.Load(XSL_FILE)
Using sw As New StreamWriter("C:\work\employees2.xml")
Dim xws As New XmlWriterSettings()
xws.Indent = True
Dim xw As XmlWriter = XmlWriter.Create(sw, xws)
xslt.Transform(doc, xw)
End Using
Console.ReadKey()
End Sub
End Class
End Namespace
<?php
define('XML_FILE','/work/employees.xml');
define('XSL_FILE','/work/employeeshtml.xsl');
// read from file to DOM tree
$doc = new DOMDocument();
$doc->load(XML_FILE);
// transform from DOM
$xsl = new DOMDocument();
$xsl->load(XSL_FILE);
$xslt = new XSLTProcessor();
$xslt->importStylesheet($xsl);
echo $xslt->transformToXML($doc);
?>
<?php
define('XML_FILE','/work/employees.xml');
define('XSL_FILE','/work/employeeshtml.xsl');
// read from file to pseudo DOM tree
$xml = new SimpleXMLElement(file_get_contents(XML_FILE));
// transform from SimpleXML
$xsl = new SimpleXMLElement(file_get_contents(XSL_FILE));
$xslt = new XSLTProcessor();
$xslt->importStylesheet($xsl);
echo $xslt->transformToXML($xml);
?>
<%
' read from file to DOM tree
Set doc = CreateObject("MSXML.DOMDocument")
doc.Async = False
doc.Load("C:\work\employees.xml")
' transform from DOM
Set xsl = CreateObject("MSXML.DOMDocument")
xsl.Async = False
xsl.Load("C:\work\employeeshtml.xsl")
Response.Write doc.TransformNode(xsl)
Set doc = Nothing
Set xsl = Nothing
%>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/parser.h>
#include <libxslt/transform.h>
#define XML_FILE "employees.xml"
#define XSL_FILE "employees.xsl"
int main()
{
xmlDoc *doc;
xsltStylesheet *tf;
xmlDoc *res;
FILE *fp;
/* read from file to DOM tree */
doc = xmlReadFile(XML_FILE, NULL, 0);
if (doc == NULL)
{
fprintf(stderr, "Failed to parse %s\n", XML_FILE);
return EXIT_FAILURE;
}
/* transform from one dom to another dom */
tf = xsltParseStylesheetFile(XSL_FILE);
res = xsltApplyStylesheet(tf, doc, NULL);
/* write out */
fp = fopen("employees2.xml", "w");
xmlDocDump(fp, res);
fclose(fp);
/* clean up */
xmlFreeDoc(res);
xsltFreeStylesheet(tf);
xmlFreeDoc(doc);
xmlCleanupParser();
/* */
return EXIT_SUCCESS;
}
Windows GCC build:
gcc -m32 -Wall -Wno-pointer-sign -I%ICONVPATH%\include -I%LIBXML2PATH%\include\libxml2 -I%LIBXSLTPATH%\include %1.c -L%LIBXML2PATH%\lib -lxml2 -L%ICONVPATH%\lib -liconv -L%ZLIBPATH%\lib -lz -L%LIBXSLTPATH%\lib -lxslt -o %1.exe
// standard C++ headers
#include <iostream>
#include <cstdlib>
using namespace std;
// Windows headers
#include <windows.h>
#include <tchar.h>
#include <comutil.h>
#include <atlconv.h>
#include <msxml.h>
const wchar_t *XML_FILE = L"employees.xml";
const wchar_t *XSL_FILE = L"employees.xsl";
void ReturnCheck(LPTSTR func, HRESULT res)
{
if(res != S_OK)
{
TCHAR buffer[1000];
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, res, 0, buffer, sizeof(buffer), 0);
cout << func << ": " << buffer << endl;
exit(1);
}
}
int main()
{
USES_CONVERSION;
HRESULT res;
VARIANT_BOOL res2;
//
CoInitialize(NULL);
// read from file to DOM tree
IXMLDOMDocument *doc;
res = CoCreateInstance(CLSID_DOMDocument, NULL, CLSCTX_INPROC_SERVER, IID_IXMLDOMDocument, (void**)&doc);
ReturnCheck(_T("CoCreateInstance"), res);
doc->put_async(false);
res = doc->load(_variant_t(XML_FILE), &res2);
ReturnCheck(_T("load"), res);
IXMLDOMDocument *xsl;
res = CoCreateInstance(CLSID_DOMDocument, NULL, CLSCTX_INPROC_SERVER, IID_IXMLDOMDocument, (void**)&xsl);
ReturnCheck(_T("CoCreateInstance"), res);
xsl->put_async(false);
res = xsl->load(_variant_t(XSL_FILE), &res2);
ReturnCheck(_T("load"), res);
BSTR xml;
res = doc->transformNode(xsl, &xml);
ReturnCheck(_T("transformNode"), res);
cout << W2A((wchar_t *)xml) << endl;
// cleanup
xsl->Release();
xsl = NULL;
doc->Release();
doc = NULL;
//
CoUninitialize();
return 0;
}
For similar examples in JSON see JSON Processing.
Version | Date | Description |
---|---|---|
1.0 | September 5th 2016 | Initial version based on multiple old articles on Eksperten.dk |
1.1 | October 7th 2016 | Add content overview |
1.2 | January 14th 2018 | Add C examples |
1.3 | July 29th 2018 | Add Delphi/Lazarus examples |
1.4 | December 26th 2018 | Add Python builtin xml examples |
1.5 | June 1st 2019 | Add C++ Xerces examples |
1.5 | November 15th 2019 | Add C Expat example |
1.6 | November 27th 2019 | Add C++ Qt examples |
1.7 | December 27th 2021 | Add C++ DOMDocument examples |
See list of all articles here
Please send comments to Arne Vajhøj
Pro's:
Con's: