// Collection of code snippets by Arne Vajhøj
// posted to eksperten.dk, usenet and other places (2002-now)
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.w3c.dom.Document;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.Locator2;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.helpers.DefaultHandler;
public class XmlEncodingDectect {
private static final String FNM1 = "/work/foobar1.xml";
private static final String FNM2 = "/work/foobar2.xml";
private static final String FNM3 = "/work/foobar3.xml";
private static void gen1() throws IOException {
PrintWriter pw = new PrintWriter(new FileWriter(FNM1));
pw.println("");
pw.println("");
pw.close();
}
private static void gen2() throws IOException {
PrintWriter pw = new PrintWriter(new FileWriter(FNM2));
pw.println("");
pw.println("");
pw.close();
}
private static void gen3() throws IOException {
PrintWriter pw = new PrintWriter(new FileWriter(FNM3));
pw.println("");
pw.println("");
pw.close();
}
private static String encoding;
private static String detectSAX(String fnm) throws SAXException, IOException {
XMLReader parser = XMLReaderFactory.createXMLReader();
parser.setContentHandler(new DefaultHandler() {
private Locator2 locator;
@Override
public void setDocumentLocator(Locator locator) {
if (locator instanceof Locator2) {
this.locator = (Locator2) locator;
} else {
encoding = "Unknown";
}
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (locator != null) {
encoding = locator.getEncoding();
}
}
});
parser.parse(fnm);
return encoding;
}
private static String detectW3CDOM(String fnm) throws ParserConfigurationException, FileNotFoundException, SAXException, IOException {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new InputSource(new FileReader(fnm)));
String encoding = doc.getXmlEncoding();
return encoding != null ? encoding : "Unknown";
}
private static String detectStAX(String fnm) throws FileNotFoundException, XMLStreamException {
XMLInputFactory xif = XMLInputFactory.newInstance();
XMLStreamReader xsr = xif.createXMLStreamReader(new FileReader(fnm));
String encoding = null;
while(xsr.hasNext()) {
xsr.next();
switch(xsr.getEventType()) {
case XMLStreamReader.START_DOCUMENT:
encoding = xsr.getEncoding();
break;
default:
break;
}
}
return encoding != null ? encoding : "Unknown";
}
private static final Pattern encpat = Pattern.compile("encoding\\s*=\\s*['\"]([^'\"]+)['\"]");
private static String detectSimple(String fnm) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(fnm));
String firstpart = "";
while(!firstpart.contains(">")) firstpart += br.readLine();
br.close();
Matcher m = encpat.matcher(firstpart);
if(m.find()) {
return m.group(1);
} else {
return "Unknown";
}
}
public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException, XMLStreamException {
gen1();
System.out.println(detectSAX(FNM1));
System.out.println(detectW3CDOM(FNM1));
System.out.println(detectStAX(FNM1));
System.out.println(detectSimple(FNM1));
gen2();
System.out.println(detectSAX(FNM2));
System.out.println(detectW3CDOM(FNM2));
System.out.println(detectStAX(FNM2));
System.out.println(detectSimple(FNM2));
gen3();
System.out.println(detectSAX(FNM3));
System.out.println(detectW3CDOM(FNM3));
System.out.println(detectStAX(FNM3));
System.out.println(detectSimple(FNM3));
}
}