catch_xmlwriter.cpp
1 2 // Copyright Catch2 Authors 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE.txt or copy at 5 // https://www.boost.org/LICENSE_1_0.txt) 6 7 // SPDX-License-Identifier: BSL-1.0 8 // Note: swapping these two includes around causes MSVC to error out 9 // while in /permissive- mode. No, I don't know why. 10 // Tested on VS 2019, 18.{3, 4}.x 11 #include <catch2/internal/catch_enforce.hpp> 12 #include <catch2/internal/catch_xmlwriter.hpp> 13 14 #include <cstdint> 15 #include <iomanip> 16 #include <type_traits> 17 18 namespace Catch { 19 20 namespace { 21 22 size_t trailingBytes(unsigned char c) { 23 if ((c & 0xE0) == 0xC0) { 24 return 2; 25 } 26 if ((c & 0xF0) == 0xE0) { 27 return 3; 28 } 29 if ((c & 0xF8) == 0xF0) { 30 return 4; 31 } 32 CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered"); 33 } 34 35 uint32_t headerValue(unsigned char c) { 36 if ((c & 0xE0) == 0xC0) { 37 return c & 0x1F; 38 } 39 if ((c & 0xF0) == 0xE0) { 40 return c & 0x0F; 41 } 42 if ((c & 0xF8) == 0xF0) { 43 return c & 0x07; 44 } 45 CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered"); 46 } 47 48 void hexEscapeChar(std::ostream& os, unsigned char c) { 49 std::ios_base::fmtflags f(os.flags()); 50 os << "\\x" 51 << std::uppercase << std::hex << std::setfill('0') << std::setw(2) 52 << static_cast<int>(c); 53 os.flags(f); 54 } 55 56 bool shouldNewline(XmlFormatting fmt) { 57 return !!(static_cast<std::underlying_type_t<XmlFormatting>>(fmt & XmlFormatting::Newline)); 58 } 59 60 bool shouldIndent(XmlFormatting fmt) { 61 return !!(static_cast<std::underlying_type_t<XmlFormatting>>(fmt & XmlFormatting::Indent)); 62 } 63 64 } // anonymous namespace 65 66 XmlFormatting operator | (XmlFormatting lhs, XmlFormatting rhs) { 67 return static_cast<XmlFormatting>( 68 static_cast<std::underlying_type_t<XmlFormatting>>(lhs) | 69 static_cast<std::underlying_type_t<XmlFormatting>>(rhs) 70 ); 71 } 72 73 XmlFormatting operator & (XmlFormatting lhs, XmlFormatting rhs) { 74 return static_cast<XmlFormatting>( 75 static_cast<std::underlying_type_t<XmlFormatting>>(lhs) & 76 static_cast<std::underlying_type_t<XmlFormatting>>(rhs) 77 ); 78 } 79 80 81 XmlEncode::XmlEncode( StringRef str, ForWhat forWhat ) 82 : m_str( str ), 83 m_forWhat( forWhat ) 84 {} 85 86 void XmlEncode::encodeTo( std::ostream& os ) const { 87 // Apostrophe escaping not necessary if we always use " to write attributes 88 // (see: http://www.w3.org/TR/xml/#syntax) 89 90 for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) { 91 unsigned char c = static_cast<unsigned char>(m_str[idx]); 92 switch (c) { 93 case '<': os << "<"; break; 94 case '&': os << "&"; break; 95 96 case '>': 97 // See: http://www.w3.org/TR/xml/#syntax 98 if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']') 99 os << ">"; 100 else 101 os << c; 102 break; 103 104 case '\"': 105 if (m_forWhat == ForAttributes) 106 os << """; 107 else 108 os << c; 109 break; 110 111 default: 112 // Check for control characters and invalid utf-8 113 114 // Escape control characters in standard ascii 115 // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0 116 if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) { 117 hexEscapeChar(os, c); 118 break; 119 } 120 121 // Plain ASCII: Write it to stream 122 if (c < 0x7F) { 123 os << c; 124 break; 125 } 126 127 // UTF-8 territory 128 // Check if the encoding is valid and if it is not, hex escape bytes. 129 // Important: We do not check the exact decoded values for validity, only the encoding format 130 // First check that this bytes is a valid lead byte: 131 // This means that it is not encoded as 1111 1XXX 132 // Or as 10XX XXXX 133 if (c < 0xC0 || 134 c >= 0xF8) { 135 hexEscapeChar(os, c); 136 break; 137 } 138 139 auto encBytes = trailingBytes(c); 140 // Are there enough bytes left to avoid accessing out-of-bounds memory? 141 if (idx + encBytes - 1 >= m_str.size()) { 142 hexEscapeChar(os, c); 143 break; 144 } 145 // The header is valid, check data 146 // The next encBytes bytes must together be a valid utf-8 147 // This means: bitpattern 10XX XXXX and the extracted value is sane (ish) 148 bool valid = true; 149 uint32_t value = headerValue(c); 150 for (std::size_t n = 1; n < encBytes; ++n) { 151 unsigned char nc = static_cast<unsigned char>(m_str[idx + n]); 152 valid &= ((nc & 0xC0) == 0x80); 153 value = (value << 6) | (nc & 0x3F); 154 } 155 156 if ( 157 // Wrong bit pattern of following bytes 158 (!valid) || 159 // Overlong encodings 160 (value < 0x80) || 161 (0x80 <= value && value < 0x800 && encBytes > 2) || 162 (0x800 < value && value < 0x10000 && encBytes > 3) || 163 // Encoded value out of range 164 (value >= 0x110000) 165 ) { 166 hexEscapeChar(os, c); 167 break; 168 } 169 170 // If we got here, this is in fact a valid(ish) utf-8 sequence 171 for (std::size_t n = 0; n < encBytes; ++n) { 172 os << m_str[idx + n]; 173 } 174 idx += encBytes - 1; 175 break; 176 } 177 } 178 } 179 180 std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) { 181 xmlEncode.encodeTo( os ); 182 return os; 183 } 184 185 XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer, XmlFormatting fmt ) 186 : m_writer( writer ), 187 m_fmt(fmt) 188 {} 189 190 XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) noexcept 191 : m_writer( other.m_writer ), 192 m_fmt(other.m_fmt) 193 { 194 other.m_writer = nullptr; 195 other.m_fmt = XmlFormatting::None; 196 } 197 XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) noexcept { 198 if ( m_writer ) { 199 m_writer->endElement(); 200 } 201 m_writer = other.m_writer; 202 other.m_writer = nullptr; 203 m_fmt = other.m_fmt; 204 other.m_fmt = XmlFormatting::None; 205 return *this; 206 } 207 208 209 XmlWriter::ScopedElement::~ScopedElement() { 210 if (m_writer) { 211 m_writer->endElement(m_fmt); 212 } 213 } 214 215 XmlWriter::ScopedElement& 216 XmlWriter::ScopedElement::writeText( StringRef text, XmlFormatting fmt ) { 217 m_writer->writeText( text, fmt ); 218 return *this; 219 } 220 221 XmlWriter::ScopedElement& 222 XmlWriter::ScopedElement::writeAttribute( StringRef name, 223 StringRef attribute ) { 224 m_writer->writeAttribute( name, attribute ); 225 return *this; 226 } 227 228 229 XmlWriter::XmlWriter( std::ostream& os ) : m_os( os ) 230 { 231 writeDeclaration(); 232 } 233 234 XmlWriter::~XmlWriter() { 235 while (!m_tags.empty()) { 236 endElement(); 237 } 238 newlineIfNecessary(); 239 } 240 241 XmlWriter& XmlWriter::startElement( std::string const& name, XmlFormatting fmt ) { 242 ensureTagClosed(); 243 newlineIfNecessary(); 244 if (shouldIndent(fmt)) { 245 m_os << m_indent; 246 m_indent += " "; 247 } 248 m_os << '<' << name; 249 m_tags.push_back( name ); 250 m_tagIsOpen = true; 251 applyFormatting(fmt); 252 return *this; 253 } 254 255 XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name, XmlFormatting fmt ) { 256 ScopedElement scoped( this, fmt ); 257 startElement( name, fmt ); 258 return scoped; 259 } 260 261 XmlWriter& XmlWriter::endElement(XmlFormatting fmt) { 262 m_indent = m_indent.substr(0, m_indent.size() - 2); 263 264 if( m_tagIsOpen ) { 265 m_os << "/>"; 266 m_tagIsOpen = false; 267 } else { 268 newlineIfNecessary(); 269 if (shouldIndent(fmt)) { 270 m_os << m_indent; 271 } 272 m_os << "</" << m_tags.back() << '>'; 273 } 274 m_os << std::flush; 275 applyFormatting(fmt); 276 m_tags.pop_back(); 277 return *this; 278 } 279 280 XmlWriter& XmlWriter::writeAttribute( StringRef name, 281 StringRef attribute ) { 282 if( !name.empty() && !attribute.empty() ) 283 m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"'; 284 return *this; 285 } 286 287 XmlWriter& XmlWriter::writeAttribute( StringRef name, bool attribute ) { 288 writeAttribute(name, (attribute ? "true"_sr : "false"_sr)); 289 return *this; 290 } 291 292 XmlWriter& XmlWriter::writeAttribute( StringRef name, 293 char const* attribute ) { 294 writeAttribute( name, StringRef( attribute ) ); 295 return *this; 296 } 297 298 XmlWriter& XmlWriter::writeText( StringRef text, XmlFormatting fmt ) { 299 CATCH_ENFORCE(!m_tags.empty(), "Cannot write text as top level element"); 300 if( !text.empty() ){ 301 bool tagWasOpen = m_tagIsOpen; 302 ensureTagClosed(); 303 if (tagWasOpen && shouldIndent(fmt)) { 304 m_os << m_indent; 305 } 306 m_os << XmlEncode( text, XmlEncode::ForTextNodes ); 307 applyFormatting(fmt); 308 } 309 return *this; 310 } 311 312 XmlWriter& XmlWriter::writeComment( StringRef text, XmlFormatting fmt ) { 313 ensureTagClosed(); 314 if (shouldIndent(fmt)) { 315 m_os << m_indent; 316 } 317 m_os << "<!-- " << text << " -->"; 318 applyFormatting(fmt); 319 return *this; 320 } 321 322 void XmlWriter::writeStylesheetRef( StringRef url ) { 323 m_os << R"(<?xml-stylesheet type="text/xsl" href=")" << url << R"("?>)" << '\n'; 324 } 325 326 void XmlWriter::ensureTagClosed() { 327 if( m_tagIsOpen ) { 328 m_os << '>' << std::flush; 329 newlineIfNecessary(); 330 m_tagIsOpen = false; 331 } 332 } 333 334 void XmlWriter::applyFormatting(XmlFormatting fmt) { 335 m_needsNewline = shouldNewline(fmt); 336 } 337 338 void XmlWriter::writeDeclaration() { 339 m_os << R"(<?xml version="1.0" encoding="UTF-8"?>)" << '\n'; 340 } 341 342 void XmlWriter::newlineIfNecessary() { 343 if( m_needsNewline ) { 344 m_os << '\n' << std::flush; 345 m_needsNewline = false; 346 } 347 } 348 }