/ src / common / string_conversion.cc
string_conversion.cc
  1  // Copyright 2006 Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  
 29  #ifdef HAVE_CONFIG_H
 30  #include <config.h>  // Must come first
 31  #endif
 32  
 33  #include <string.h>
 34  
 35  #include "common/convert_UTF.h"
 36  #include "common/scoped_ptr.h"
 37  #include "common/string_conversion.h"
 38  #include "common/using_std_string.h"
 39  
 40  namespace google_breakpad {
 41  
 42  using std::vector;
 43  
 44  void UTF8ToUTF16(const char* in, vector<uint16_t>* out) {
 45    size_t source_length = strlen(in);
 46    const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
 47    const UTF8* source_end_ptr = source_ptr + source_length;
 48    // Erase the contents and zero fill to the expected size
 49    out->clear();
 50    out->insert(out->begin(), source_length, 0);
 51    uint16_t* target_ptr = &(*out)[0];
 52    uint16_t* target_end_ptr = target_ptr + out->capacity();
 53    ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
 54                                                 &target_ptr, target_end_ptr,
 55                                                 strictConversion);
 56  
 57    // Resize to be the size of the # of converted characters + NULL
 58    out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
 59  }
 60  
 61  int UTF8ToUTF16Char(const char* in, int in_length, uint16_t out[2]) {
 62    const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
 63    const UTF8* source_end_ptr = source_ptr + 1;
 64    uint16_t* target_ptr = out;
 65    uint16_t* target_end_ptr = target_ptr + 2;
 66    out[0] = out[1] = 0;
 67  
 68    // Process one character at a time
 69    while (1) {
 70      ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
 71                                                   &target_ptr, target_end_ptr,
 72                                                   strictConversion);
 73  
 74      if (result == conversionOK)
 75        return static_cast<int>(source_ptr - reinterpret_cast<const UTF8*>(in));
 76  
 77      // Add another character to the input stream and try again
 78      source_ptr = reinterpret_cast<const UTF8*>(in);
 79      ++source_end_ptr;
 80  
 81      if (source_end_ptr > reinterpret_cast<const UTF8*>(in) + in_length)
 82        break;
 83    }
 84  
 85    return 0;
 86  }
 87  
 88  void UTF32ToUTF16(const wchar_t* in, vector<uint16_t>* out) {
 89    size_t source_length = wcslen(in);
 90    const UTF32* source_ptr = reinterpret_cast<const UTF32*>(in);
 91    const UTF32* source_end_ptr = source_ptr + source_length;
 92    // Erase the contents and zero fill to the expected size
 93    out->clear();
 94    out->insert(out->begin(), source_length, 0);
 95    uint16_t* target_ptr = &(*out)[0];
 96    uint16_t* target_end_ptr = target_ptr + out->capacity();
 97    ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
 98                                                  &target_ptr, target_end_ptr,
 99                                                  strictConversion);
100  
101    // Resize to be the size of the # of converted characters + NULL
102    out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
103  }
104  
105  void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) {
106    const UTF32* source_ptr = reinterpret_cast<const UTF32*>(&in);
107    const UTF32* source_end_ptr = source_ptr + 1;
108    uint16_t* target_ptr = out;
109    uint16_t* target_end_ptr = target_ptr + 2;
110    out[0] = out[1] = 0;
111    ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
112                                                  &target_ptr, target_end_ptr,
113                                                  strictConversion);
114  
115    if (result != conversionOK) {
116      out[0] = out[1] = 0;
117    }
118  }
119  
120  static inline uint16_t Swap(uint16_t value) {
121    return (value >> 8) | static_cast<uint16_t>(value << 8);
122  }
123  
124  string UTF16ToUTF8(const vector<uint16_t>& in, bool swap) {
125    const UTF16* source_ptr = &in[0];
126    scoped_array<uint16_t> source_buffer;
127  
128    // If we're to swap, we need to make a local copy and swap each byte pair
129    if (swap) {
130      int idx = 0;
131      source_buffer.reset(new uint16_t[in.size()]);
132      UTF16* source_buffer_ptr = source_buffer.get();
133      for (vector<uint16_t>::const_iterator it = in.begin();
134           it != in.end(); ++it, ++idx)
135        source_buffer_ptr[idx] = Swap(*it);
136  
137      source_ptr = source_buffer.get();
138    }
139  
140    // The maximum expansion would be 4x the size of the input string.
141    const UTF16* source_end_ptr = source_ptr + in.size();
142    size_t target_capacity = in.size() * 4;
143    scoped_array<UTF8> target_buffer(new UTF8[target_capacity]);
144    UTF8* target_ptr = target_buffer.get();
145    UTF8* target_end_ptr = target_ptr + target_capacity;
146    ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr,
147                                                 &target_ptr, target_end_ptr,
148                                                 strictConversion);
149  
150    if (result == conversionOK) {
151      const char* targetPtr = reinterpret_cast<const char*>(target_buffer.get());
152      return targetPtr;
153    }
154  
155    return "";
156  }
157  
158  }  // namespace google_breakpad