¿Cómo imprimir bastante XML desde Java?

Tengo una cadena de Java que contiene XML, sin alimentaciones de línea ni sangrías. Me gustaría convertirlo en una cadena con XML muy formateado. ¿Cómo hago esto?

String unformattedXml = "hello"; String formattedXml = new [UnknownClass]().format(unformattedXml); 

Nota: Mi entrada es una cadena . Mi salida es una cadena .

 Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); //initialize StreamResult with File object to save to file StreamResult result = new StreamResult(new StringWriter()); DOMSource source = new DOMSource(doc); transformer.transform(source, result); String xmlString = result.getWriter().toString(); System.out.println(xmlString); 

Nota: Los resultados pueden variar según la versión de Java. Busque soluciones alternativas específicas para su plataforma.

Aquí hay una respuesta a mi propia pregunta. Combiné las respuestas de los diversos resultados para escribir una clase que imprime bastante XML.

No hay garantías sobre cómo responde con XML no válido o documentos grandes.

 package ecb.sdw.pretty; import org.apache.xml.serialize.OutputFormat; import org.apache.xml.serialize.XMLSerializer; import org.w3c.dom.Document; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; /** * Pretty-prints xml, supplied as a string. * 

* eg. *
* String formattedXml = new XmlFormatter().format("hello"); * */ public class XmlFormatter { public XmlFormatter() { } public String format(String unformattedXml) { try { final Document document = parseXmlFile(unformattedXml); OutputFormat format = new OutputFormat(document); format.setLineWidth(65); format.setIndenting(true); format.setIndent(2); Writer out = new StringWriter(); XMLSerializer serializer = new XMLSerializer(out, format); serializer.serialize(document); return out.toString(); } catch (IOException e) { throw new RuntimeException(e); } } private Document parseXmlFile(String in) { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); InputSource is = new InputSource(new StringReader(in)); return db.parse(is); } catch (ParserConfigurationException e) { throw new RuntimeException(e); } catch (SAXException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } } public static void main(String[] args) { String unformattedXml = "< ?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + " \n" + " \n" + " \t\t\t\t\t ECB\n\n\n\n\n" + " \n" + " \n\n\n\n\n" + ""; System.out.println(new XmlFormatter().format(unformattedXml)); } }

una solución más simple basada en esta respuesta :

 public static String prettyFormat(String input, int indent) { try { Source xmlInput = new StreamSource(new StringReader(input)); StringWriter stringWriter = new StringWriter(); StreamResult xmlOutput = new StreamResult(stringWriter); TransformerFactory transformerFactory = TransformerFactory.newInstance(); transformerFactory.setAttribute("indent-number", indent); Transformer transformer = transformerFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.transform(xmlInput, xmlOutput); return xmlOutput.getWriter().toString(); } catch (Exception e) { throw new RuntimeException(e); // simple exception handling, please review it } } public static String prettyFormat(String input) { return prettyFormat(input, 2); } 

caso de prueba:

 prettyFormat("aaa"); 

devoluciones:

 < ?xml version="1.0" encoding="UTF-8"?>  aaa   

Ahora es 2012 y Java puede hacer más de lo que solía hacerlo con XML, me gustaría agregar una alternativa a mi respuesta aceptada. Esto no tiene dependencias fuera de Java 6.

 import org.w3c.dom.Node; import org.w3c.dom.bootstrap.DOMImplementationRegistry; import org.w3c.dom.ls.DOMImplementationLS; import org.w3c.dom.ls.LSSerializer; import org.xml.sax.InputSource; import javax.xml.parsers.DocumentBuilderFactory; import java.io.StringReader; /** * Pretty-prints xml, supplied as a string. * 

* eg. *
* String formattedXml = new XmlFormatter().format("hello"); * */ public class XmlFormatter { public String format(String xml) { try { final InputSource src = new InputSource(new StringReader(xml)); final Node document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(src).getDocumentElement(); final Boolean keepDeclaration = Boolean.valueOf(xml.startsWith("< ?xml")); //May need this: System.setProperty(DOMImplementationRegistry.PROPERTY,"com.sun.org.apache.xerces.internal.dom.DOMImplementationSourceImpl"); final DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance(); final DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS"); final LSSerializer writer = impl.createLSSerializer(); writer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE); // Set this to true if the output needs to be beautified. writer.getDomConfig().setParameter("xml-declaration", keepDeclaration); // Set this to true if the declaration is needed to be outputted. return writer.writeToString(document); } catch (Exception e) { throw new RuntimeException(e); } } public static void main(String[] args) { String unformattedXml = "\n" + " \n" + " \n" + " \t\t\t\t\t ECB\n\n\n\n\n" + " \n" + " \n\n\n\n\n" + ""; System.out.println(new XmlFormatter().format(unformattedXml)); } }

Solo para tener en cuenta que la respuesta mejor clasificada requiere el uso de xerces.

Si no desea agregar esta dependencia externa, simplemente puede usar las bibliotecas jdk estándar (que en realidad están comstackdas usando xerces internamente).

NB Hubo un error con jdk versión 1.5 ver http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6296446 pero ahora está resuelto.,

(Tenga en cuenta que si se produce un error, esto devolverá el texto original)

 package com.test; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.sax.SAXTransformerFactory; import javax.xml.transform.stream.StreamResult; import org.xml.sax.InputSource; public class XmlTest { public static void main(String[] args) { XmlTest t = new XmlTest(); System.out.println(t.formatXml("text D")); } public String formatXml(String xml){ try{ Transformer serializer= SAXTransformerFactory.newInstance().newTransformer(); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); //serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); serializer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); //serializer.setOutputProperty("{http://xml.customer.org/xslt}indent-amount", "2"); Source xmlSource=new SAXSource(new InputSource(new ByteArrayInputStream(xml.getBytes()))); StreamResult res = new StreamResult(new ByteArrayOutputStream()); serializer.transform(xmlSource, res); return new String(((ByteArrayOutputStream)res.getOutputStream()).toByteArray()); }catch(Exception e){ //TODO log error return xml; } } } 

Imprimí bastante en el pasado con el método org.dom4j.io.OutputFormat.createPrettyPrint ()

 public String prettyPrint(final String xml){ if (StringUtils.isBlank(xml)) { throw new RuntimeException("xml was null or blank in prettyPrint()"); } final StringWriter sw; try { final OutputFormat format = OutputFormat.createPrettyPrint(); final org.dom4j.Document document = DocumentHelper.parseText(xml); sw = new StringWriter(); final XMLWriter writer = new XMLWriter(sw, format); writer.write(document); } catch (Exception e) { throw new RuntimeException("Error pretty printing xml:\n" + xml, e); } return sw.toString(); } 

Aquí hay una forma de hacerlo usando dom4j :

Importaciones:

 import org.dom4j.Document; import org.dom4j.DocumentHelper; import org.dom4j.io.OutputFormat; import org.dom4j.io.XMLWriter; 

Código:

 String xml = ""; Document doc = DocumentHelper.parseText(xml); StringWriter sw = new StringWriter(); OutputFormat format = OutputFormat.createPrettyPrint(); XMLWriter xw = new XMLWriter(sw, format); xw.write(doc); String result = sw.toString(); 

Como está empezando con una String , necesita encubrir un objeto DOM (por ejemplo, un Node ) antes de poder usar el Transformer . Sin embargo, si sabe que su cadena XML es válida, y no desea incurrir en la sobrecarga de memoria de analizar una cadena en un DOM, y luego ejecutar una transformación sobre el DOM para recuperar una cadena, podría simplemente hacer algo de anticuado caracterización por carácter. Inserte una nueva línea y espacios después de cada caracteres, mantenga y sangría el contador (para determinar el número de espacios) que incremente para cada < ...> y disminuya para cada que vea.

Descargo de responsabilidad: Hice una edición de corte / pegado / texto de las funciones a continuación, por lo que es posible que no se compile como está.

 public static final Element createDOM(String strXML) throws ParserConfigurationException, SAXException, IOException { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(true); DocumentBuilder db = dbf.newDocumentBuilder(); InputSource sourceXML = new InputSource(new StringReader(strXML)) Document xmlDoc = db.parse(sourceXML); Element e = xmlDoc.getDocumentElement(); e.normalize(); return e; } public static final void prettyPrint(Node xml, OutputStream out) throws TransformerConfigurationException, TransformerFactoryConfigurationError, TransformerException { Transformer tf = TransformerFactory.newInstance().newTransformer(); tf.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); tf.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); tf.setOutputProperty(OutputKeys.INDENT, "yes"); tf.transform(new DOMSource(xml), new StreamResult(out)); } 

Si está bien utilizar una biblioteca XML de un tercero, puede salirse con la suya con algo significativamente más simple que lo que sugieren las respuestas más votados actualmente.

Se indicó que tanto la entrada como la salida deberían ser cadenas, así que aquí hay un método de utilidad que hace exactamente eso, implementado con la biblioteca XOM :

 import nu.xom.*; import java.io.*; [...] public static String format(String xml) throws ParsingException, IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); Serializer serializer = new Serializer(out); serializer.setIndent(4); // or whatever you like serializer.write(new Builder().build(xml, "")); return out.toString("UTF-8"); } 

Probé que funciona, y los resultados no dependen de la versión de tu JRE ni nada de eso. Para ver cómo personalizar el formato de salida a su gusto, eche un vistazo a la API de Serializer .

En realidad, esto salió más tiempo de lo que pensaba, se necesitaban algunas líneas adicionales porque Serializer quiere un OutputStream para escribir. Pero tenga en cuenta que hay muy poco código para el intercambio de XML real aquí.

(Esta respuesta es parte de mi evaluación de XOM, que se sugirió como una opción en mi pregunta sobre la mejor biblioteca Java XML para reemplazar dom4j. Para el registro, con dom4j podría lograr esto con facilidad similar utilizando XMLWriter y OutputFormat . … como se demostró en la respuesta de mlo55 .)

Kevin Hakanson dijo: “Sin embargo, si sabes que tu cadena XML es válida, y no quieres incurrir en la sobrecarga de memoria de analizar una cadena en un DOM, entonces ejecutando una transformación sobre el DOM para recuperar una cadena, podrías simplemente haga un poco de carácter antiguo mediante el análisis de caracteres. Inserte una línea nueva y espacios después de cada carácter, mantenga y sangría el contador (para determinar el número de espacios) que incremente para cada < ...> y disminuya por cada que vea “.

Convenido. Tal enfoque es mucho más rápido y tiene muchas menos dependencias.

Ejemplo de solución:

 /** * XML utils, including formatting. */ public class XmlUtils { private static XmlFormatter formatter = new XmlFormatter(2, 80); public static String formatXml(String s) { return formatter.format(s, 0); } public static String formatXml(String s, int initialIndent) { return formatter.format(s, initialIndent); } private static class XmlFormatter { private int indentNumChars; private int lineLength; private boolean singleLine; public XmlFormatter(int indentNumChars, int lineLength) { this.indentNumChars = indentNumChars; this.lineLength = lineLength; } public synchronized String format(String s, int initialIndent) { int indent = initialIndent; StringBuilder sb = new StringBuilder(); for (int i = 0; i < s.length(); i++) { char currentChar = s.charAt(i); if (currentChar == '<') { char nextChar = s.charAt(i + 1); if (nextChar == '/') indent -= indentNumChars; if (!singleLine) // Don't indent before closing element if we're creating opening and closing elements on a single line. sb.append(buildWhitespace(indent)); if (nextChar != '?' && nextChar != '!' && nextChar != '/') indent += indentNumChars; singleLine = false; // Reset flag. } sb.append(currentChar); if (currentChar == '>') { if (s.charAt(i - 1) == '/') { indent -= indentNumChars; sb.append("\n"); } else { int nextStartElementPos = s.indexOf('< ', i); if (nextStartElementPos > i + 1) { String textBetweenElements = s.substring(i + 1, nextStartElementPos); // If the space between elements is solely newlines, let them through to preserve additional newlines in source document. if (textBetweenElements.replaceAll("\n", "").length() == 0) { sb.append(textBetweenElements + "\n"); } // Put tags and text on a single line if the text is short. else if (textBetweenElements.length() < = lineLength * 0.5) { sb.append(textBetweenElements); singleLine = true; } // For larger amounts of text, wrap lines to a maximum line length. else { sb.append("\n" + lineWrap(textBetweenElements, lineLength, indent, null) + "\n"); } i = nextStartElementPos - 1; } else { sb.append("\n"); } } } } return sb.toString(); } } private static String buildWhitespace(int numChars) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < numChars; i++) sb.append(" "); return sb.toString(); } /** * Wraps the supplied text to the specified line length. * @lineLength the maximum length of each line in the returned string (not including indent if specified). * @indent optional number of whitespace characters to prepend to each line before the text. * @linePrefix optional string to append to the indent (before the text). * @returns the supplied text wrapped so that no line exceeds the specified line length + indent, optionally with * indent and prefix applied to each line. */ private static String lineWrap(String s, int lineLength, Integer indent, String linePrefix) { if (s == null) return null; StringBuilder sb = new StringBuilder(); int lineStartPos = 0; int lineEndPos; boolean firstLine = true; while(lineStartPos < s.length()) { if (!firstLine) sb.append("\n"); else firstLine = false; if (lineStartPos + lineLength > s.length()) lineEndPos = s.length() - 1; else { lineEndPos = lineStartPos + lineLength - 1; while (lineEndPos > lineStartPos && (s.charAt(lineEndPos) != ' ' && s.charAt(lineEndPos) != '\t')) lineEndPos--; } sb.append(buildWhitespace(indent)); if (linePrefix != null) sb.append(linePrefix); sb.append(s.substring(lineStartPos, lineEndPos + 1)); lineStartPos = lineEndPos + 1; } return sb.toString(); } // other utils removed for brevity } 

Hmmm … se enfrentó a algo así y es un error conocido … simplemente agrega esta OutputProperty …

 transformer.setOutputProperty(OutputPropertiesFactory.S_KEY_INDENT_AMOUNT, "8"); 

Espero que esto ayude …

Usando scala:

 import xml._ val xml = XML.loadString("hello") val formatted = new PrettyPrinter(150, 2).format(xml) println(formatted) 

También puede hacer esto en Java, si depende de scala-library.jar. Se parece a esto:

 import scala.xml.*; public class FormatXML { public static void main(String[] args) { String unformattedXml = "hello"; PrettyPrinter pp = new PrettyPrinter(150, 3); String formatted = pp.format(XML.loadString(unformattedXml), TopScope$.MODULE$); System.out.println(formatted); } } 

El objeto PrettyPrinter se construye con dos entradas, la primera es la longitud máxima de la línea y la segunda es el paso de sangría.

En cuanto al comentario de que “primero debe construir un árbol DOM”: No, no necesita y no debe hacer eso.

En su lugar, cree un StreamSource (nuevo StreamSource (nuevo StringReader (str)), y aliméntelo con el transformador de identidad mencionado. Utilizará el analizador SAX, y el resultado será mucho más rápido. Construir un árbol intermedio es una sobrecarga pura para este caso. De lo contrario, la respuesta mejor clasificada es buena.

Solo para futuras referencias, aquí hay una solución que funcionó para mí (gracias a un comentario que @George Hawkins publicó en una de las respuestas):

 DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance(); DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS"); LSSerializer writer = impl.createLSSerializer(); writer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE); LSOutput output = impl.createLSOutput(); ByteArrayOutputStream out = new ByteArrayOutputStream(); output.setByteStream(out); writer.write(document, output); String xmlStr = new String(out.toByteArray()); 

versión ligeramente mejorada de milosmns …

 public static String getPrettyXml(String xml) { if (xml == null || xml.trim().length() == 0) return ""; int stack = 0; StringBuilder pretty = new StringBuilder(); String[] rows = xml.trim().replaceAll(">", ">\n").replaceAll("< ", "\n<").split("\n"); for (int i = 0; i < rows.length; i++) { if (rows[i] == null || rows[i].trim().length() == 0) continue; String row = rows[i].trim(); if (row.startsWith("") == false) { String indent = repeatString(stack++); pretty.append(indent + row + "\n"); if (row.endsWith("]]>")) stack--; } else { String indent = repeatString(stack); pretty.append(indent + row + "\n"); } } return pretty.toString().trim(); } private static String repeatString(int stack) { StringBuilder indent = new StringBuilder(); for (int i = 0; i < stack; i++) { indent.append(" "); } return indent.toString(); } 

Si está seguro de tener un XML válido, este es simple y evita los árboles XML DOM. Tal vez tiene algunos errores, haz un comentario si ves algo

 public String prettyPrint(String xml) { if (xml == null || xml.trim().length() == 0) return ""; int stack = 0; StringBuilder pretty = new StringBuilder(); String[] rows = xml.trim().replaceAll(">", ">\n").replaceAll("< ", "\n<").split("\n"); for (int i = 0; i < rows.length; i++) { if (rows[i] == null || rows[i].trim().length() == 0) continue; String row = rows[i].trim(); if (row.startsWith(" 

Todas las soluciones anteriores no funcionaron para mí, entonces encontré esto http://myshittycode.com/2014/02/10/java-properly-indenting-xml-string/

La clave es eliminar espacios en blanco con XPath

  String xml = "" + "\n " + "\nCoco Puff" + "\n 10 "; try { Document document = DocumentBuilderFactory.newInstance() .newDocumentBuilder() .parse(new InputSource(new ByteArrayInputStream(xml.getBytes("utf-8")))); XPath xPath = XPathFactory.newInstance().newXPath(); NodeList nodeList = (NodeList) xPath.evaluate("//text()[normalize-space()='']", document, XPathConstants.NODESET); for (int i = 0; i < nodeList.getLength(); ++i) { Node node = nodeList.item(i); node.getParentNode().removeChild(node); } Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); StringWriter stringWriter = new StringWriter(); StreamResult streamResult = new StreamResult(stringWriter); transformer.transform(new DOMSource(document), streamResult); System.out.println(stringWriter.toString()); } catch (Exception e) { e.printStackTrace(); } 

Solo otra solución que funciona para nosotros

 import java.io.StringWriter; import org.dom4j.DocumentHelper; import org.dom4j.io.OutputFormat; import org.dom4j.io.XMLWriter; ** * Pretty Print XML String * * @param inputXmlString * @return */ public static String prettyPrintXml(String xml) { final StringWriter sw; try { final OutputFormat format = OutputFormat.createPrettyPrint(); final org.dom4j.Document document = DocumentHelper.parseText(xml); sw = new StringWriter(); final XMLWriter writer = new XMLWriter(sw, format); writer.write(document); } catch (Exception e) { throw new RuntimeException("Error pretty printing xml:\n" + xml, e); } return sw.toString(); } 

Este código a continuación funciona a la perfección

 import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; String formattedXml1 = prettyFormat("aaa"); public static String prettyFormat(String input) { return prettyFormat(input, "2"); } public static String prettyFormat(String input, String indent) { Source xmlInput = new StreamSource(new StringReader(input)); StringWriter stringWriter = new StringWriter(); try { TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", indent); transformer.transform(xmlInput, new StreamResult(stringWriter)); String pretty = stringWriter.toString(); pretty = pretty.replace("\r\n", "\n"); return pretty; } catch (Exception e) { throw new RuntimeException(e); } } 

As an alternative to the answers from max , codeskraps , David Easley and milosmns , have a look at my lightweight, high-performance pretty-printer library: xml-formatter

 // construct lightweight, threadsafe, instance PrettyPrinter prettyPrinter = PrettyPrinterBuilder.newPrettyPrinter().build(); StringBuilder buffer = new StringBuilder(); String xml = ..; // also works with char[] or Reader if(prettyPrinter.process(xml, buffer)) { // valid XML, print buffer } else { // invalid XML, print xml } 

Sometimes, like when running mocked SOAP services directly from file, it is good to have a pretty-printer which also handles already pretty-printed XML:

 PrettyPrinter prettyPrinter = PrettyPrinterBuilder.newPrettyPrinter().ignoreWhitespace().build(); 

As some have commented, pretty-printing is just a way of presenting XML in a more human-readable form – whitespace strictly does not belong in your XML data.

The library is intended for pretty-printing for logging purposes, and also includes functions for filtering (subtree removal / anonymization) and pretty-printing of XML in CDATA and Text nodes.

I had the same problem and I’m having great success with JTidy ( http://jtidy.sourceforge.net/index.html )

Ejemplo:

 Tidy t = new Tidy(); t.setIndentContent(true); Document d = t.parseDOM( new ByteArrayInputStream("HTML goes here", null); OutputStream out = new ByteArrayOutputStream(); t.pprint(d, out); String html = out.toString(); 

Using jdom2 : http://www.jdom.org/

 import java.io.StringReader; import org.jdom2.input.SAXBuilder; import org.jdom2.output.Format; import org.jdom2.output.XMLOutputter; String prettyXml = new XMLOutputter(Format.getPrettyFormat()). outputString(new SAXBuilder().build(new StringReader(uglyXml))); 

there is a very nice command line xml utility called xmlstarlet( http://xmlstar.sourceforge.net/ ) that can do a lot of things which a lot of people use.

Your could execute this program progtwigtically using Runtime.exec and then readin the formatted output file. It has more options and better error reporting than a few lines of Java code can provide.

download xmlstarlet : http://sourceforge.net/project/showfiles.php?group_id=66612&package_id=64589

I have found that in Java 1.6.0_32 the normal method to pretty print an XML string (using a Transformer with a null or identity xslt) does not behave as I would like if tags are merely separated by whitespace, as opposed to having no separating text. I tried using in my template to no avail. The simplest solution I found was to strip the space the way I wanted using a SAXSource and XML filter. Since my solution was for logging I also extended this to work with incomplete XML fragments. Note the normal method seems to work fine if you use a DOMSource but I did not want to use this because of the incompleteness and memory overhead.

 public static class WhitespaceIgnoreFilter extends XMLFilterImpl { @Override public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException { //Ignore it then... } @Override public void characters( char[] ch, int start, int length) throws SAXException { if (!new String(ch, start, length).trim().equals("")) super.characters(ch, start, length); } } public static String prettyXML(String logMsg, boolean allowBadlyFormedFragments) throws SAXException, IOException, TransformerException { TransformerFactory transFactory = TransformerFactory.newInstance(); transFactory.setAttribute("indent-number", new Integer(2)); Transformer transformer = transFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); StringWriter out = new StringWriter(); XMLReader masterParser = SAXHelper.getSAXParser(true); XMLFilter parser = new WhitespaceIgnoreFilter(); parser.setParent(masterParser); if(allowBadlyFormedFragments) { transformer.setErrorListener(new ErrorListener() { @Override public void warning(TransformerException exception) throws TransformerException { } @Override public void fatalError(TransformerException exception) throws TransformerException { } @Override public void error(TransformerException exception) throws TransformerException { } }); } try { transformer.transform(new SAXSource(parser, new InputSource(new StringReader(logMsg))), new StreamResult(out)); } catch (TransformerException e) { if(e.getCause() != null && e.getCause() instanceof SAXParseException) { if(!allowBadlyFormedFragments || !"XML document structures must start and end within the same entity.".equals(e.getCause().getMessage())) { throw e; } } else { throw e; } } out.flush(); return out.toString(); } 

The solutions I have found here for Java 1.6+ do not reformat the code if it is already formatted. The one that worked for me (and re-formatted already formatted code) was the following.

 import org.apache.xml.security.c14n.CanonicalizationException; import org.apache.xml.security.c14n.Canonicalizer; import org.apache.xml.security.c14n.InvalidCanonicalizerException; import org.w3c.dom.Element; import org.w3c.dom.bootstrap.DOMImplementationRegistry; import org.w3c.dom.ls.DOMImplementationLS; import org.w3c.dom.ls.LSSerializer; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import java.io.IOException; import java.io.StringReader; public class XmlUtils { public static String toCanonicalXml(String xml) throws InvalidCanonicalizerException, ParserConfigurationException, SAXException, CanonicalizationException, IOException { Canonicalizer canon = Canonicalizer.getInstance(Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS); byte canonXmlBytes[] = canon.canonicalize(xml.getBytes()); return new String(canonXmlBytes); } public static String prettyFormat(String input) throws TransformerException, ParserConfigurationException, IOException, SAXException, InstantiationException, IllegalAccessException, ClassNotFoundException { InputSource src = new InputSource(new StringReader(input)); Element document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(src).getDocumentElement(); Boolean keepDeclaration = input.startsWith("< ?xml"); DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance(); DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS"); LSSerializer writer = impl.createLSSerializer(); writer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE); writer.getDomConfig().setParameter("xml-declaration", keepDeclaration); return writer.writeToString(document); } } 

It is a good tool to use in your unit tests for full-string xml comparison.

 private void assertXMLEqual(String expected, String actual) throws ParserConfigurationException, IOException, SAXException, CanonicalizationException, InvalidCanonicalizerException, TransformerException, IllegalAccessException, ClassNotFoundException, InstantiationException { String canonicalExpected = prettyFormat(toCanonicalXml(expected)); String canonicalActual = prettyFormat(toCanonicalXml(actual)); assertEquals(canonicalExpected, canonicalActual); } 

For those searching for a quick and dirty solution – which doesn’t need the XML to be 100% valid. eg in case of REST / SOAP logging (you never know what the others send ;-))

I found and advanced a code snipped I found online which I think is still missing here as a valid possible approach:

 public static String prettyPrintXMLAsString(String xmlString) { /* Remove new lines */ final String LINE_BREAK = "\n"; xmlString = xmlString.replaceAll(LINE_BREAK, ""); StringBuffer prettyPrintXml = new StringBuffer(); /* Group the xml tags */ Pattern pattern = Pattern.compile("(< [^/][^>]+>)?([^< ]*)(]+>)?(< [^/][^>]+/>)?"); Matcher matcher = pattern.matcher(xmlString); int tabCount = 0; while (matcher.find()) { String str1 = (null == matcher.group(1) || "null".equals(matcher.group())) ? "" : matcher.group(1); String str2 = (null == matcher.group(2) || "null".equals(matcher.group())) ? "" : matcher.group(2); String str3 = (null == matcher.group(3) || "null".equals(matcher.group())) ? "" : matcher.group(3); String str4 = (null == matcher.group(4) || "null".equals(matcher.group())) ? "" : matcher.group(4); if (matcher.group() != null && !matcher.group().trim().equals("")) { printTabs(tabCount, prettyPrintXml); if (!str1.equals("") && str3.equals("")) { ++tabCount; } if (str1.equals("") && !str3.equals("")) { --tabCount; prettyPrintXml.deleteCharAt(prettyPrintXml.length() - 1); } prettyPrintXml.append(str1); prettyPrintXml.append(str2); prettyPrintXml.append(str3); if (!str4.equals("")) { prettyPrintXml.append(LINE_BREAK); printTabs(tabCount, prettyPrintXml); prettyPrintXml.append(str4); } prettyPrintXml.append(LINE_BREAK); } } return prettyPrintXml.toString(); } private static void printTabs(int count, StringBuffer stringBuffer) { for (int i = 0; i < count; i++) { stringBuffer.append("\t"); } } public static void main(String[] args) { String x = new String( "soap:ClientINVALID_MESSAGE20007INVALID_MESSAGEProblems creating SAAJ object model"); System.out.println(prettyPrintXMLAsString(x)); } 

here is the output:

    soap:Client INVALID_MESSAGE   20007 INVALID_MESSAGE Problems creating SAAJ object model      

I saw one answer using Scala , so here is another one in Groovy , just in case someone finds it interesting. The default indentation is 2 steps, XmlNodePrinter constructor can be passed another value as well.

 def xml = "hello" def stringWriter = new StringWriter() def node = new XmlParser().parseText(xml); new XmlNodePrinter(new PrintWriter(stringWriter)).print(node) println stringWriter.toString() 

Usage from Java if groovy jar is in classpath

  String xml = "hello"; StringWriter stringWriter = new StringWriter(); Node node = new XmlParser().parseText(xml); new XmlNodePrinter(new PrintWriter(stringWriter)).print(node); System.out.println(stringWriter.toString()); 

In case you do not need indentation that much but a few line breaks, it could be sufficient to simply regex…

 String leastPrettifiedXml = uglyXml.replaceAll(">< ", ">\n< "); 

The code is nice, not the result because of missing indentation.


(For solutions with indentation, see other answers.)

Prueba esto:

  try { TransformerFactory transFactory = TransformerFactory.newInstance(); Transformer transformer = null; transformer = transFactory.newTransformer(); StringWriter buffer = new StringWriter(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); transformer.transform(new DOMSource(element), new StreamResult(buffer)); String str = buffer.toString(); System.out.println("XML INSIDE IS #########################################"+str); return element; } catch (TransformerConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } 

I should have looked for this page first before coming up with my own solution! Anyway, mine uses Java recursion to parse the xml page. This code is totally self-contained and does not rely on third party libraries. Also .. it uses recursion!

 // you call this method passing in the xml text public static void prettyPrint(String text){ prettyPrint(text, 0); } // "index" corresponds to the number of levels of nesting and/or the number of tabs to print before printing the tag public static void prettyPrint(String xmlText, int index){ boolean foundTagStart = false; StringBuilder tagChars = new StringBuilder(); String startTag = ""; String endTag = ""; String[] chars = xmlText.split(""); // find the next start tag for(String ch : chars){ if(ch.equalsIgnoreCase("< ")){ tagChars.append(ch); foundTagStart = true; } else if(ch.equalsIgnoreCase(">") && foundTagStart){ startTag = tagChars.append(ch).toString(); String tempTag = startTag; endTag = (tempTag.contains("\"") ? (tempTag.split(" ")[0] + ">") : tempTag).replace("< ", " =>  break; } else if(foundTagStart){ tagChars.append(ch); } } // once start and end tag are calculated, print start tag, then content, then end tag if(foundTagStart){ int startIndex = xmlText.indexOf(startTag); int endIndex = xmlText.indexOf(endTag); // handle if matching tags NOT found if((startIndex < 0) || (endIndex < 0)){ if(startIndex < 0) { // no start tag found return; } else { // start tag found, no end tag found (handles single tags aka "" or "< ?xml ...>") printTabs(index); System.out.println(startTag); // move on to the next tag // NOTE: "index" (not index+1) because next tag is on same level as this one prettyPrint(xmlText.substring(startIndex+startTag.length(), xmlText.length()), index); return; } // handle when matching tags found } else { String content = xmlText.substring(startIndex+startTag.length(), endIndex); boolean isTagContainsTags = content.contains("< "); // content contains tags printTabs(index); if(isTagContainsTags){ // ie: stuff System.out.println(startTag); prettyPrint(content, index+1); // "index+1" because "content" is nested printTabs(index); } else { System.out.print(startTag); // ie: stuff or  System.out.print(content); } System.out.println(endTag); int nextIndex = endIndex + endTag.length(); if(xmlText.length() > nextIndex){ // if there are more tags on this level, continue prettyPrint(xmlText.substring(nextIndex, xmlText.length()), index); } } } else { System.out.print(xmlText); } } private static void printTabs(int counter){ while(counter-- > 0){ System.out.print("\t"); } }