/ externals / catch / src / catch2 / internal / catch_xmlwriter.cpp
catch_xmlwriter.cpp
  1  
  2  //              Copyright Catch2 Authors
  3  // Distributed under the Boost Software License, Version 1.0.
  4  //   (See accompanying file LICENSE.txt or copy at
  5  //        https://www.boost.org/LICENSE_1_0.txt)
  6  
  7  // SPDX-License-Identifier: BSL-1.0
  8  // Note: swapping these two includes around causes MSVC to error out
  9  //       while in /permissive- mode. No, I don't know why.
 10  //       Tested on VS 2019, 18.{3, 4}.x
 11  #include <catch2/internal/catch_enforce.hpp>
 12  #include <catch2/internal/catch_xmlwriter.hpp>
 13  
 14  #include <cstdint>
 15  #include <iomanip>
 16  #include <type_traits>
 17  
 18  namespace Catch {
 19  
 20  namespace {
 21  
 22      size_t trailingBytes(unsigned char c) {
 23          if ((c & 0xE0) == 0xC0) {
 24              return 2;
 25          }
 26          if ((c & 0xF0) == 0xE0) {
 27              return 3;
 28          }
 29          if ((c & 0xF8) == 0xF0) {
 30              return 4;
 31          }
 32          CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
 33      }
 34  
 35      uint32_t headerValue(unsigned char c) {
 36          if ((c & 0xE0) == 0xC0) {
 37              return c & 0x1F;
 38          }
 39          if ((c & 0xF0) == 0xE0) {
 40              return c & 0x0F;
 41          }
 42          if ((c & 0xF8) == 0xF0) {
 43              return c & 0x07;
 44          }
 45          CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
 46      }
 47  
 48      void hexEscapeChar(std::ostream& os, unsigned char c) {
 49          std::ios_base::fmtflags f(os.flags());
 50          os << "\\x"
 51              << std::uppercase << std::hex << std::setfill('0') << std::setw(2)
 52              << static_cast<int>(c);
 53          os.flags(f);
 54      }
 55  
 56      bool shouldNewline(XmlFormatting fmt) {
 57          return !!(static_cast<std::underlying_type_t<XmlFormatting>>(fmt & XmlFormatting::Newline));
 58      }
 59  
 60      bool shouldIndent(XmlFormatting fmt) {
 61          return !!(static_cast<std::underlying_type_t<XmlFormatting>>(fmt & XmlFormatting::Indent));
 62      }
 63  
 64  } // anonymous namespace
 65  
 66      XmlFormatting operator | (XmlFormatting lhs, XmlFormatting rhs) {
 67          return static_cast<XmlFormatting>(
 68              static_cast<std::underlying_type_t<XmlFormatting>>(lhs) |
 69              static_cast<std::underlying_type_t<XmlFormatting>>(rhs)
 70          );
 71      }
 72  
 73      XmlFormatting operator & (XmlFormatting lhs, XmlFormatting rhs) {
 74          return static_cast<XmlFormatting>(
 75              static_cast<std::underlying_type_t<XmlFormatting>>(lhs) &
 76              static_cast<std::underlying_type_t<XmlFormatting>>(rhs)
 77          );
 78      }
 79  
 80  
 81      XmlEncode::XmlEncode( StringRef str, ForWhat forWhat )
 82      :   m_str( str ),
 83          m_forWhat( forWhat )
 84      {}
 85  
 86      void XmlEncode::encodeTo( std::ostream& os ) const {
 87          // Apostrophe escaping not necessary if we always use " to write attributes
 88          // (see: http://www.w3.org/TR/xml/#syntax)
 89  
 90          for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) {
 91              unsigned char c = static_cast<unsigned char>(m_str[idx]);
 92              switch (c) {
 93              case '<':   os << "&lt;"; break;
 94              case '&':   os << "&amp;"; break;
 95  
 96              case '>':
 97                  // See: http://www.w3.org/TR/xml/#syntax
 98                  if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']')
 99                      os << "&gt;";
100                  else
101                      os << c;
102                  break;
103  
104              case '\"':
105                  if (m_forWhat == ForAttributes)
106                      os << "&quot;";
107                  else
108                      os << c;
109                  break;
110  
111              default:
112                  // Check for control characters and invalid utf-8
113  
114                  // Escape control characters in standard ascii
115                  // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
116                  if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) {
117                      hexEscapeChar(os, c);
118                      break;
119                  }
120  
121                  // Plain ASCII: Write it to stream
122                  if (c < 0x7F) {
123                      os << c;
124                      break;
125                  }
126  
127                  // UTF-8 territory
128                  // Check if the encoding is valid and if it is not, hex escape bytes.
129                  // Important: We do not check the exact decoded values for validity, only the encoding format
130                  // First check that this bytes is a valid lead byte:
131                  // This means that it is not encoded as 1111 1XXX
132                  // Or as 10XX XXXX
133                  if (c <  0xC0 ||
134                      c >= 0xF8) {
135                      hexEscapeChar(os, c);
136                      break;
137                  }
138  
139                  auto encBytes = trailingBytes(c);
140                  // Are there enough bytes left to avoid accessing out-of-bounds memory?
141                  if (idx + encBytes - 1 >= m_str.size()) {
142                      hexEscapeChar(os, c);
143                      break;
144                  }
145                  // The header is valid, check data
146                  // The next encBytes bytes must together be a valid utf-8
147                  // This means: bitpattern 10XX XXXX and the extracted value is sane (ish)
148                  bool valid = true;
149                  uint32_t value = headerValue(c);
150                  for (std::size_t n = 1; n < encBytes; ++n) {
151                      unsigned char nc = static_cast<unsigned char>(m_str[idx + n]);
152                      valid &= ((nc & 0xC0) == 0x80);
153                      value = (value << 6) | (nc & 0x3F);
154                  }
155  
156                  if (
157                      // Wrong bit pattern of following bytes
158                      (!valid) ||
159                      // Overlong encodings
160                      (value < 0x80) ||
161                      (0x80 <= value && value < 0x800   && encBytes > 2) ||
162                      (0x800 < value && value < 0x10000 && encBytes > 3) ||
163                      // Encoded value out of range
164                      (value >= 0x110000)
165                      ) {
166                      hexEscapeChar(os, c);
167                      break;
168                  }
169  
170                  // If we got here, this is in fact a valid(ish) utf-8 sequence
171                  for (std::size_t n = 0; n < encBytes; ++n) {
172                      os << m_str[idx + n];
173                  }
174                  idx += encBytes - 1;
175                  break;
176              }
177          }
178      }
179  
180      std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) {
181          xmlEncode.encodeTo( os );
182          return os;
183      }
184  
185      XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer, XmlFormatting fmt )
186      :   m_writer( writer ),
187          m_fmt(fmt)
188      {}
189  
190      XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) noexcept
191      :   m_writer( other.m_writer ),
192          m_fmt(other.m_fmt)
193      {
194          other.m_writer = nullptr;
195          other.m_fmt = XmlFormatting::None;
196      }
197      XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) noexcept {
198          if ( m_writer ) {
199              m_writer->endElement();
200          }
201          m_writer = other.m_writer;
202          other.m_writer = nullptr;
203          m_fmt = other.m_fmt;
204          other.m_fmt = XmlFormatting::None;
205          return *this;
206      }
207  
208  
209      XmlWriter::ScopedElement::~ScopedElement() {
210          if (m_writer) {
211              m_writer->endElement(m_fmt);
212          }
213      }
214  
215      XmlWriter::ScopedElement&
216      XmlWriter::ScopedElement::writeText( StringRef text, XmlFormatting fmt ) {
217          m_writer->writeText( text, fmt );
218          return *this;
219      }
220  
221      XmlWriter::ScopedElement&
222      XmlWriter::ScopedElement::writeAttribute( StringRef name,
223                                                StringRef attribute ) {
224          m_writer->writeAttribute( name, attribute );
225          return *this;
226      }
227  
228  
229      XmlWriter::XmlWriter( std::ostream& os ) : m_os( os )
230      {
231          writeDeclaration();
232      }
233  
234      XmlWriter::~XmlWriter() {
235          while (!m_tags.empty()) {
236              endElement();
237          }
238          newlineIfNecessary();
239      }
240  
241      XmlWriter& XmlWriter::startElement( std::string const& name, XmlFormatting fmt ) {
242          ensureTagClosed();
243          newlineIfNecessary();
244          if (shouldIndent(fmt)) {
245              m_os << m_indent;
246              m_indent += "  ";
247          }
248          m_os << '<' << name;
249          m_tags.push_back( name );
250          m_tagIsOpen = true;
251          applyFormatting(fmt);
252          return *this;
253      }
254  
255      XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name, XmlFormatting fmt ) {
256          ScopedElement scoped( this, fmt );
257          startElement( name, fmt );
258          return scoped;
259      }
260  
261      XmlWriter& XmlWriter::endElement(XmlFormatting fmt) {
262          m_indent = m_indent.substr(0, m_indent.size() - 2);
263  
264          if( m_tagIsOpen ) {
265              m_os << "/>";
266              m_tagIsOpen = false;
267          } else {
268              newlineIfNecessary();
269              if (shouldIndent(fmt)) {
270                  m_os << m_indent;
271              }
272              m_os << "</" << m_tags.back() << '>';
273          }
274          m_os << std::flush;
275          applyFormatting(fmt);
276          m_tags.pop_back();
277          return *this;
278      }
279  
280      XmlWriter& XmlWriter::writeAttribute( StringRef name,
281                                            StringRef attribute ) {
282          if( !name.empty() && !attribute.empty() )
283              m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"';
284          return *this;
285      }
286  
287      XmlWriter& XmlWriter::writeAttribute( StringRef name, bool attribute ) {
288          writeAttribute(name, (attribute ? "true"_sr : "false"_sr));
289          return *this;
290      }
291  
292      XmlWriter& XmlWriter::writeAttribute( StringRef name,
293                                            char const* attribute ) {
294          writeAttribute( name, StringRef( attribute ) );
295          return *this;
296      }
297  
298      XmlWriter& XmlWriter::writeText( StringRef text, XmlFormatting fmt ) {
299          CATCH_ENFORCE(!m_tags.empty(), "Cannot write text as top level element");
300          if( !text.empty() ){
301              bool tagWasOpen = m_tagIsOpen;
302              ensureTagClosed();
303              if (tagWasOpen && shouldIndent(fmt)) {
304                  m_os << m_indent;
305              }
306              m_os << XmlEncode( text, XmlEncode::ForTextNodes );
307              applyFormatting(fmt);
308          }
309          return *this;
310      }
311  
312      XmlWriter& XmlWriter::writeComment( StringRef text, XmlFormatting fmt ) {
313          ensureTagClosed();
314          if (shouldIndent(fmt)) {
315              m_os << m_indent;
316          }
317          m_os << "<!-- " << text << " -->";
318          applyFormatting(fmt);
319          return *this;
320      }
321  
322      void XmlWriter::writeStylesheetRef( StringRef url ) {
323          m_os << R"(<?xml-stylesheet type="text/xsl" href=")" << url << R"("?>)" << '\n';
324      }
325  
326      void XmlWriter::ensureTagClosed() {
327          if( m_tagIsOpen ) {
328              m_os << '>' << std::flush;
329              newlineIfNecessary();
330              m_tagIsOpen = false;
331          }
332      }
333  
334      void XmlWriter::applyFormatting(XmlFormatting fmt) {
335          m_needsNewline = shouldNewline(fmt);
336      }
337  
338      void XmlWriter::writeDeclaration() {
339          m_os << R"(<?xml version="1.0" encoding="UTF-8"?>)" << '\n';
340      }
341  
342      void XmlWriter::newlineIfNecessary() {
343          if( m_needsNewline ) {
344              m_os << '\n' << std::flush;
345              m_needsNewline = false;
346          }
347      }
348  }