encodings_test.go
1 // Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Requirement: Any integration or derivative work must explicitly attribute 16 // Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its 17 // documentation or user interface, as detailed in the NOTICE file. 18 19 // Package httpx 编码转换测试 20 package httpx 21 22 import ( 23 "testing" 24 25 "github.com/stretchr/testify/assert" 26 "github.com/stretchr/testify/require" 27 "golang.org/x/text/encoding/simplifiedchinese" 28 "golang.org/x/text/encoding/traditionalchinese" 29 "golang.org/x/text/transform" 30 "io/ioutil" 31 "strings" 32 ) 33 34 // encodeToGBK 将 UTF-8 字符串编码为 GBK 字节(测试辅助函数) 35 func encodeToGBK(s string) ([]byte, error) { 36 encoder := simplifiedchinese.GBK.NewEncoder() 37 reader := transform.NewReader(strings.NewReader(s), encoder) 38 return ioutil.ReadAll(reader) 39 } 40 41 // encodeToBig5 将 UTF-8 字符串编码为 BIG5 字节(测试辅助函数) 42 func encodeToBig5(s string) ([]byte, error) { 43 encoder := traditionalchinese.Big5.NewEncoder() 44 reader := transform.NewReader(strings.NewReader(s), encoder) 45 return ioutil.ReadAll(reader) 46 } 47 48 // TestDecodegbk_ChineseText 测试 GBK 中文转 UTF-8 49 func TestDecodegbk_ChineseText(t *testing.T) { 50 original := "你好世界" 51 // 先将 UTF-8 编码为 GBK 52 gbkBytes, err := encodeToGBK(original) 53 require.NoError(t, err, "编码为 GBK 不应出错") 54 55 // 再解码回 UTF-8 56 utf8Bytes, err := Decodegbk(gbkBytes) 57 require.NoError(t, err, "GBK 解码不应出错") 58 assert.Equal(t, original, string(utf8Bytes), "GBK 解码后应还原为原始中文字符串") 59 } 60 61 // TestDecodegbk_ASCII 测试纯 ASCII 字符 GBK 解码(ASCII 在 GBK 中兼容) 62 func TestDecodegbk_ASCII(t *testing.T) { 63 input := []byte("Hello, World!") 64 result, err := Decodegbk(input) 65 require.NoError(t, err, "ASCII 输入的 GBK 解码不应出错") 66 assert.Equal(t, "Hello, World!", string(result), "ASCII 字符应原样保留") 67 } 68 69 // TestDecodegbk_Empty 测试空字节切片 GBK 解码 70 func TestDecodegbk_Empty(t *testing.T) { 71 result, err := Decodegbk([]byte{}) 72 require.NoError(t, err, "空输入解码不应出错") 73 assert.Equal(t, []byte{}, result, "空输入应返回空结果") 74 } 75 76 // TestDecodebig5_ChineseText 测试 BIG5 繁体中文转 UTF-8 77 func TestDecodebig5_ChineseText(t *testing.T) { 78 original := "繁體中文測試" 79 // 先将 UTF-8 编码为 BIG5 80 big5Bytes, err := encodeToBig5(original) 81 require.NoError(t, err, "编码为 BIG5 不应出错") 82 83 // 再解码回 UTF-8 84 utf8Bytes, err := Decodebig5(big5Bytes) 85 require.NoError(t, err, "BIG5 解码不应出错") 86 assert.Equal(t, original, string(utf8Bytes), "BIG5 解码后应还原为原始繁体中文字符串") 87 } 88 89 // TestDecodebig5_ASCII 测试纯 ASCII 字符 BIG5 解码 90 func TestDecodebig5_ASCII(t *testing.T) { 91 input := []byte("Test123") 92 result, err := Decodebig5(input) 93 require.NoError(t, err, "ASCII 输入的 BIG5 解码不应出错") 94 assert.Equal(t, "Test123", string(result), "ASCII 字符应原样保留") 95 } 96 97 // TestDecodebig5_Empty 测试空字节切片 BIG5 解码 98 func TestDecodebig5_Empty(t *testing.T) { 99 result, err := Decodebig5([]byte{}) 100 require.NoError(t, err, "空输入解码不应出错") 101 assert.Equal(t, []byte{}, result, "空输入应返回空结果") 102 } 103 104 // TestDecodegbk_RoundTrip 测试 GBK 编解码往返一致性 105 func TestDecodegbk_RoundTrip(t *testing.T) { 106 samples := []string{ 107 "腾讯安全", 108 "AI-Infra-Guard", 109 "朱雀实验室", 110 } 111 for _, s := range samples { 112 gbkBytes, err := encodeToGBK(s) 113 require.NoError(t, err) 114 decoded, err := Decodegbk(gbkBytes) 115 require.NoError(t, err) 116 assert.Equal(t, s, string(decoded), "GBK 往返转换应还原原始字符串: %q", s) 117 } 118 } 119 120 // TestDecodebig5_RoundTrip 测试 BIG5 编解码往返一致性 121 func TestDecodebig5_RoundTrip(t *testing.T) { 122 samples := []string{ 123 "繁體字測試", 124 "台灣繁體中文", 125 } 126 for _, s := range samples { 127 big5Bytes, err := encodeToBig5(s) 128 require.NoError(t, err) 129 decoded, err := Decodebig5(big5Bytes) 130 require.NoError(t, err) 131 assert.Equal(t, s, string(decoded), "BIG5 往返转换应还原原始字符串: %q", s) 132 } 133 }