string_conversion.cc
1 // Copyright 2006 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 #ifdef HAVE_CONFIG_H 30 #include <config.h> // Must come first 31 #endif 32 33 #include <string.h> 34 35 #include "common/convert_UTF.h" 36 #include "common/scoped_ptr.h" 37 #include "common/string_conversion.h" 38 #include "common/using_std_string.h" 39 40 namespace google_breakpad { 41 42 using std::vector; 43 44 void UTF8ToUTF16(const char* in, vector<uint16_t>* out) { 45 size_t source_length = strlen(in); 46 const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in); 47 const UTF8* source_end_ptr = source_ptr + source_length; 48 // Erase the contents and zero fill to the expected size 49 out->clear(); 50 out->insert(out->begin(), source_length, 0); 51 uint16_t* target_ptr = &(*out)[0]; 52 uint16_t* target_end_ptr = target_ptr + out->capacity(); 53 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, 54 &target_ptr, target_end_ptr, 55 strictConversion); 56 57 // Resize to be the size of the # of converted characters + NULL 58 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); 59 } 60 61 int UTF8ToUTF16Char(const char* in, int in_length, uint16_t out[2]) { 62 const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in); 63 const UTF8* source_end_ptr = source_ptr + 1; 64 uint16_t* target_ptr = out; 65 uint16_t* target_end_ptr = target_ptr + 2; 66 out[0] = out[1] = 0; 67 68 // Process one character at a time 69 while (1) { 70 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, 71 &target_ptr, target_end_ptr, 72 strictConversion); 73 74 if (result == conversionOK) 75 return static_cast<int>(source_ptr - reinterpret_cast<const UTF8*>(in)); 76 77 // Add another character to the input stream and try again 78 source_ptr = reinterpret_cast<const UTF8*>(in); 79 ++source_end_ptr; 80 81 if (source_end_ptr > reinterpret_cast<const UTF8*>(in) + in_length) 82 break; 83 } 84 85 return 0; 86 } 87 88 void UTF32ToUTF16(const wchar_t* in, vector<uint16_t>* out) { 89 size_t source_length = wcslen(in); 90 const UTF32* source_ptr = reinterpret_cast<const UTF32*>(in); 91 const UTF32* source_end_ptr = source_ptr + source_length; 92 // Erase the contents and zero fill to the expected size 93 out->clear(); 94 out->insert(out->begin(), source_length, 0); 95 uint16_t* target_ptr = &(*out)[0]; 96 uint16_t* target_end_ptr = target_ptr + out->capacity(); 97 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, 98 &target_ptr, target_end_ptr, 99 strictConversion); 100 101 // Resize to be the size of the # of converted characters + NULL 102 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); 103 } 104 105 void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) { 106 const UTF32* source_ptr = reinterpret_cast<const UTF32*>(&in); 107 const UTF32* source_end_ptr = source_ptr + 1; 108 uint16_t* target_ptr = out; 109 uint16_t* target_end_ptr = target_ptr + 2; 110 out[0] = out[1] = 0; 111 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, 112 &target_ptr, target_end_ptr, 113 strictConversion); 114 115 if (result != conversionOK) { 116 out[0] = out[1] = 0; 117 } 118 } 119 120 static inline uint16_t Swap(uint16_t value) { 121 return (value >> 8) | static_cast<uint16_t>(value << 8); 122 } 123 124 string UTF16ToUTF8(const vector<uint16_t>& in, bool swap) { 125 const UTF16* source_ptr = &in[0]; 126 scoped_array<uint16_t> source_buffer; 127 128 // If we're to swap, we need to make a local copy and swap each byte pair 129 if (swap) { 130 int idx = 0; 131 source_buffer.reset(new uint16_t[in.size()]); 132 UTF16* source_buffer_ptr = source_buffer.get(); 133 for (vector<uint16_t>::const_iterator it = in.begin(); 134 it != in.end(); ++it, ++idx) 135 source_buffer_ptr[idx] = Swap(*it); 136 137 source_ptr = source_buffer.get(); 138 } 139 140 // The maximum expansion would be 4x the size of the input string. 141 const UTF16* source_end_ptr = source_ptr + in.size(); 142 size_t target_capacity = in.size() * 4; 143 scoped_array<UTF8> target_buffer(new UTF8[target_capacity]); 144 UTF8* target_ptr = target_buffer.get(); 145 UTF8* target_end_ptr = target_ptr + target_capacity; 146 ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr, 147 &target_ptr, target_end_ptr, 148 strictConversion); 149 150 if (result == conversionOK) { 151 const char* targetPtr = reinterpret_cast<const char*>(target_buffer.get()); 152 return targetPtr; 153 } 154 155 return ""; 156 } 157 158 } // namespace google_breakpad