serialize.js
1 "use strict"; 2 3 const xnv = require("xml-name-validator"); 4 5 const attributeUtils = require("./attributes"); 6 const { NAMESPACES, VOID_ELEMENTS, NODE_TYPES } = require("./constants"); 7 8 const XML_CHAR = /^(\x09|\x0A|\x0D|[\x20-\uD7FF]|[\uE000-\uFFFD]|(?:[\uD800-\uDBFF][\uDC00-\uDFFF]))*$/; 9 const PUBID_CHAR = /^(\x20|\x0D|\x0A|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%])*$/; 10 11 function asciiCaseInsensitiveMatch(a, b) { 12 if (a.length !== b.length) { 13 return false; 14 } 15 16 for (let i = 0; i < a.length; ++i) { 17 if ((a.charCodeAt(i) | 32) !== (b.charCodeAt(i) | 32)) { 18 return false; 19 } 20 } 21 22 return true; 23 } 24 25 function recordNamespaceInformation(element, map, prefixMap) { 26 let defaultNamespaceAttrValue = null; 27 for (let i = 0; i < element.attributes.length; ++i) { 28 const attr = element.attributes[i]; 29 if (attr.namespaceURI === NAMESPACES.XMLNS) { 30 if (attr.prefix === null) { 31 defaultNamespaceAttrValue = attr.value; 32 continue; 33 } 34 let namespaceDefinition = attr.value; 35 if (namespaceDefinition === NAMESPACES.XML) { 36 continue; 37 } 38 // This is exactly the other way than the spec says, but that's intended. 39 // All the maps coalesce null to the empty string (explained in the 40 // spec), so instead of doing that every time, just do it once here. 41 if (namespaceDefinition === null) { 42 namespaceDefinition = ""; 43 } 44 45 if ( 46 namespaceDefinition in map && 47 map[namespaceDefinition].includes(attr.localName) 48 ) { 49 continue; 50 } 51 if (!(namespaceDefinition in map)) { 52 map[namespaceDefinition] = []; 53 } 54 map[namespaceDefinition].push(attr.localName); 55 prefixMap[attr.localName] = namespaceDefinition; 56 } 57 } 58 return defaultNamespaceAttrValue; 59 } 60 61 function serializeDocumentType(node, namespace, prefixMap, requireWellFormed) { 62 if (requireWellFormed && !PUBID_CHAR.test(node.publicId)) { 63 throw new Error("Failed to serialize XML: document type node publicId is not well-formed."); 64 } 65 66 if ( 67 requireWellFormed && 68 (!XML_CHAR.test(node.systemId) || 69 (node.systemId.includes('"') && node.systemId.includes("'"))) 70 ) { 71 throw new Error("Failed to serialize XML: document type node systemId is not well-formed."); 72 } 73 74 let markup = `<!DOCTYPE ${node.name}`; 75 if (node.publicId !== "") { 76 markup += ` PUBLIC "${node.publicId}"`; 77 } else if (node.systemId !== "") { 78 markup += " SYSTEM"; 79 } 80 if (node.systemId !== "") { 81 markup += ` "${node.systemId}"`; 82 } 83 return markup + ">"; 84 } 85 86 function serializeProcessingInstruction( 87 node, 88 namespace, 89 prefixMap, 90 requireWellFormed 91 ) { 92 if ( 93 requireWellFormed && 94 (node.target.includes(":") || asciiCaseInsensitiveMatch(node.target, "xml")) 95 ) { 96 throw new Error("Failed to serialize XML: processing instruction node target is not well-formed."); 97 } 98 if ( 99 requireWellFormed && 100 (!XML_CHAR.test(node.data) || node.data.includes("?>")) 101 ) { 102 throw new Error("Failed to serialize XML: processing instruction node data is not well-formed."); 103 } 104 return `<?${node.target} ${node.data}?>`; 105 } 106 107 function serializeDocument( 108 node, 109 namespace, 110 prefixMap, 111 requireWellFormed, 112 refs 113 ) { 114 if (requireWellFormed && node.documentElement === null) { 115 throw new Error("Failed to serialize XML: document does not have a document element."); 116 } 117 let serializedDocument = ""; 118 for (const child of node.childNodes) { 119 serializedDocument += xmlSerialization( 120 child, 121 namespace, 122 prefixMap, 123 requireWellFormed, 124 refs 125 ); 126 } 127 return serializedDocument; 128 } 129 130 function serializeDocumentFragment( 131 node, 132 namespace, 133 prefixMap, 134 requireWellFormed, 135 refs 136 ) { 137 let markup = ""; 138 for (const child of node.childNodes) { 139 markup += xmlSerialization( 140 child, 141 namespace, 142 prefixMap, 143 requireWellFormed, 144 refs 145 ); 146 } 147 return markup; 148 } 149 150 function serializeText(node, namespace, prefixMap, requireWellFormed) { 151 if (requireWellFormed && !XML_CHAR.test(node.data)) { 152 throw new Error("Failed to serialize XML: text node data is not well-formed."); 153 } 154 155 return node.data 156 .replace(/&/g, "&") 157 .replace(/</g, "<") 158 .replace(/>/g, ">"); 159 } 160 161 function serializeComment(node, namespace, prefixMap, requireWellFormed) { 162 if (requireWellFormed && !XML_CHAR.test(node.data)) { 163 throw new Error("Failed to serialize XML: comment node data is not well-formed."); 164 } 165 166 if ( 167 requireWellFormed && 168 (node.data.includes("--") || node.data.endsWith("-")) 169 ) { 170 throw new Error("Failed to serialize XML: found hyphens in illegal places in comment node data."); 171 } 172 return `<!--${node.data}-->`; 173 } 174 175 function serializeElement(node, namespace, prefixMap, requireWellFormed, refs) { 176 if ( 177 requireWellFormed && 178 (node.localName.includes(":") || !xnv.name(node.localName)) 179 ) { 180 throw new Error("Failed to serialize XML: element node localName is not a valid XML name."); 181 } 182 let markup = "<"; 183 let qualifiedName = ""; 184 let skipEndTag = false; 185 let ignoreNamespaceDefinitionAttr = false; 186 const map = Object.assign({}, prefixMap); 187 const localPrefixesMap = Object.create(null); 188 const localDefaultNamespace = recordNamespaceInformation( 189 node, 190 map, 191 localPrefixesMap 192 ); 193 let inheritedNs = namespace; 194 const ns = node.namespaceURI; 195 if (inheritedNs === ns) { 196 if (localDefaultNamespace !== null) { 197 ignoreNamespaceDefinitionAttr = true; 198 } 199 if (ns === NAMESPACES.XML) { 200 qualifiedName = "xml:" + node.localName; 201 } else { 202 qualifiedName = node.localName; 203 } 204 markup += qualifiedName; 205 } else { 206 let { prefix } = node; 207 let candidatePrefix = attributeUtils.preferredPrefixString(map, ns, prefix); 208 if (prefix === "xmlns") { 209 if (requireWellFormed) { 210 throw new Error("Failed to serialize XML: element nodes can't have a prefix of \"xmlns\"."); 211 } 212 candidatePrefix = "xmlns"; 213 } 214 if (candidatePrefix !== null) { 215 qualifiedName = candidatePrefix + ":" + node.localName; 216 if ( 217 localDefaultNamespace !== null && 218 localDefaultNamespace !== NAMESPACES.XML 219 ) { 220 inheritedNs = 221 localDefaultNamespace === "" ? null : localDefaultNamespace; 222 } 223 markup += qualifiedName; 224 } else if (prefix !== null) { 225 if (prefix in localPrefixesMap) { 226 prefix = attributeUtils.generatePrefix(map, ns, refs.prefixIndex++); 227 } 228 if (map[ns]) { 229 map[ns].push(prefix); 230 } else { 231 map[ns] = [prefix]; 232 } 233 qualifiedName = prefix + ":" + node.localName; 234 markup += `${qualifiedName} xmlns:${prefix}="${attributeUtils.serializeAttributeValue( 235 ns, 236 requireWellFormed 237 )}"`; 238 if (localDefaultNamespace !== null) { 239 inheritedNs = 240 localDefaultNamespace === "" ? null : localDefaultNamespace; 241 } 242 } else if (localDefaultNamespace === null || localDefaultNamespace !== ns) { 243 ignoreNamespaceDefinitionAttr = true; 244 qualifiedName = node.localName; 245 inheritedNs = ns; 246 markup += `${qualifiedName} xmlns="${attributeUtils.serializeAttributeValue( 247 ns, 248 requireWellFormed 249 )}"`; 250 } else { 251 qualifiedName = node.localName; 252 inheritedNs = ns; 253 markup += qualifiedName; 254 } 255 } 256 257 markup += attributeUtils.serializeAttributes( 258 node, 259 map, 260 localPrefixesMap, 261 ignoreNamespaceDefinitionAttr, 262 requireWellFormed, 263 refs 264 ); 265 266 if ( 267 ns === NAMESPACES.HTML && 268 node.childNodes.length === 0 && 269 VOID_ELEMENTS.has(node.localName) 270 ) { 271 markup += " /"; 272 skipEndTag = true; 273 } else if (ns !== NAMESPACES.HTML && node.childNodes.length === 0) { 274 markup += "/"; 275 skipEndTag = true; 276 } 277 markup += ">"; 278 if (skipEndTag) { 279 return markup; 280 } 281 282 if (ns === NAMESPACES.HTML && node.localName === "template") { 283 markup += xmlSerialization( 284 node.content, 285 inheritedNs, 286 map, 287 requireWellFormed, 288 refs 289 ); 290 } else { 291 for (const child of node.childNodes) { 292 markup += xmlSerialization( 293 child, 294 inheritedNs, 295 map, 296 requireWellFormed, 297 refs 298 ); 299 } 300 } 301 markup += `</${qualifiedName}>`; 302 return markup; 303 } 304 305 function serializeCDATASection(node) { 306 return "<![CDATA[" + node.data + "]]>"; 307 } 308 309 /** 310 * @param {{prefixIndex: number}} refs 311 */ 312 function xmlSerialization(node, namespace, prefixMap, requireWellFormed, refs) { 313 switch (node.nodeType) { 314 case NODE_TYPES.ELEMENT_NODE: 315 return serializeElement( 316 node, 317 namespace, 318 prefixMap, 319 requireWellFormed, 320 refs 321 ); 322 case NODE_TYPES.DOCUMENT_NODE: 323 return serializeDocument( 324 node, 325 namespace, 326 prefixMap, 327 requireWellFormed, 328 refs 329 ); 330 case NODE_TYPES.COMMENT_NODE: 331 return serializeComment(node, namespace, prefixMap, requireWellFormed); 332 case NODE_TYPES.TEXT_NODE: 333 return serializeText(node, namespace, prefixMap, requireWellFormed); 334 case NODE_TYPES.DOCUMENT_FRAGMENT_NODE: 335 return serializeDocumentFragment( 336 node, 337 namespace, 338 prefixMap, 339 requireWellFormed, 340 refs 341 ); 342 case NODE_TYPES.DOCUMENT_TYPE_NODE: 343 return serializeDocumentType( 344 node, 345 namespace, 346 prefixMap, 347 requireWellFormed 348 ); 349 case NODE_TYPES.PROCESSING_INSTRUCTION_NODE: 350 return serializeProcessingInstruction( 351 node, 352 namespace, 353 prefixMap, 354 requireWellFormed 355 ); 356 case NODE_TYPES.ATTRIBUTE_NODE: 357 return ""; 358 case NODE_TYPES.CDATA_SECTION_NODE: 359 return serializeCDATASection(node); 360 default: 361 throw new TypeError("Failed to serialize XML: only Nodes can be serialized."); 362 } 363 } 364 365 module.exports = (root, { requireWellFormed = false } = {}) => { 366 const namespacePrefixMap = Object.create(null); 367 namespacePrefixMap["http://www.w3.org/XML/1998/namespace"] = ["xml"]; 368 return xmlSerialization(root, null, namespacePrefixMap, requireWellFormed, { 369 prefixIndex: 1 370 }); 371 };