/ common / utils / code_language_test.go
code_language_test.go
  1  // Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved.
  2  //
  3  // Licensed under the Apache License, Version 2.0 (the "License");
  4  // you may not use this file except in compliance with the License.
  5  // You may obtain a copy of the License at
  6  //
  7  //     http://www.apache.org/licenses/LICENSE-2.0
  8  //
  9  // Unless required by applicable law or agreed to in writing, software
 10  // distributed under the License is distributed on an "AS IS" BASIS,
 11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12  // See the License for the specific language governing permissions and
 13  // limitations under the License.
 14  //
 15  // Requirement: Any integration or derivative work must explicitly attribute
 16  // Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its
 17  // documentation or user interface, as detailed in the NOTICE file.
 18  
 19  package utils
 20  
 21  import (
 22  	"os"
 23  	"path/filepath"
 24  	"testing"
 25  
 26  	"github.com/stretchr/testify/assert"
 27  	"github.com/stretchr/testify/require"
 28  )
 29  
 30  // TestClassifyLanguage 测试扩展名到语言的映射
 31  func TestClassifyLanguage(t *testing.T) {
 32  	cases := []struct {
 33  		ext      string
 34  		expected string
 35  	}{
 36  		{".go", "Go"},
 37  		{".py", "Python"},
 38  		{".java", "Java"},
 39  		{".rs", "Rust"},
 40  		{".php", "PHP"},
 41  		{".rb", "Ruby"},
 42  		{".swift", "Swift"},
 43  		{".c", "C"},
 44  		{".h", "C"},
 45  		{".cpp", "C++"},
 46  		{".hpp", "C++"},
 47  		{".js", "JavaScript"},
 48  		{".ts", "TypeScript"},
 49  		{".html", "HTML"},
 50  		{".css", "CSS"},
 51  		{".sql", "SQL"},
 52  		{".sh", "Shell"},
 53  		// 未知扩展名应返回空字符串
 54  		{".unknown", ""},
 55  		{"", ""},
 56  		{".md", ""},
 57  		{".txt", ""},
 58  	}
 59  
 60  	for _, tc := range cases {
 61  		t.Run("ext="+tc.ext, func(t *testing.T) {
 62  			result := classifyLanguage(tc.ext)
 63  			assert.Equal(t, tc.expected, result, "扩展名 %q 应映射到语言 %q", tc.ext, tc.expected)
 64  		})
 65  	}
 66  }
 67  
 68  // TestGetTopLanguage_Empty 测试空 map 返回 "Other"
 69  func TestGetTopLanguage_Empty(t *testing.T) {
 70  	result := GetTopLanguage(map[string]int{})
 71  	// 空 map 应返回 "Other"
 72  	assert.Equal(t, "Other", result, "空 map 应返回 'Other'")
 73  }
 74  
 75  // TestGetTopLanguage_SingleEntry 测试单个语言返回该语言
 76  func TestGetTopLanguage_SingleEntry(t *testing.T) {
 77  	stats := map[string]int{"Go": 10}
 78  	result := GetTopLanguage(stats)
 79  	assert.Equal(t, "Go", result, "单个语言时应返回该语言")
 80  }
 81  
 82  // TestGetTopLanguage_MultipleEntries 测试多语言返回文件数最多的语言
 83  func TestGetTopLanguage_MultipleEntries(t *testing.T) {
 84  	stats := map[string]int{
 85  		"Go":     50,
 86  		"Python": 30,
 87  		"Java":   10,
 88  	}
 89  	result := GetTopLanguage(stats)
 90  	// Go 文件最多,应返回 Go
 91  	assert.Equal(t, "Go", result, "文件数最多的语言应排第一")
 92  }
 93  
 94  // TestGetTopLanguage_TieBreak 测试相同数量时不 panic(仅验证返回非空)
 95  func TestGetTopLanguage_TieBreak(t *testing.T) {
 96  	stats := map[string]int{
 97  		"Go":     5,
 98  		"Python": 5,
 99  	}
100  	result := GetTopLanguage(stats)
101  	// 相同数量时应返回其中一个(具体结果取决于排序稳定性,只验证不为空)
102  	assert.NotEmpty(t, result, "相同数量时应返回非空语言")
103  }
104  
105  // TestAnalyzeLanguage_EmptyDir 测试空目录返回空 map
106  func TestAnalyzeLanguage_EmptyDir(t *testing.T) {
107  	// 创建临时空目录
108  	dir := t.TempDir()
109  	stats := AnalyzeLanguage(dir)
110  	assert.Empty(t, stats, "空目录应返回空 map")
111  }
112  
113  // TestAnalyzeLanguage_WithFiles 测试含有源文件的目录
114  func TestAnalyzeLanguage_WithFiles(t *testing.T) {
115  	// 创建临时目录并写入测试文件
116  	dir := t.TempDir()
117  
118  	// 创建 2 个 Go 文件和 1 个 Python 文件
119  	require.NoError(t, os.WriteFile(filepath.Join(dir, "main.go"), []byte("package main"), 0644))
120  	require.NoError(t, os.WriteFile(filepath.Join(dir, "util.go"), []byte("package util"), 0644))
121  	require.NoError(t, os.WriteFile(filepath.Join(dir, "script.py"), []byte("print('hello')"), 0644))
122  
123  	stats := AnalyzeLanguage(dir)
124  
125  	// Go 应统计为 2,Python 应统计为 1
126  	assert.Equal(t, 2, stats["Go"], "Go 文件数应为 2")
127  	assert.Equal(t, 1, stats["Python"], "Python 文件数应为 1")
128  }
129  
130  // TestAnalyzeLanguage_UnknownExtensions 测试未知扩展名不计入统计
131  func TestAnalyzeLanguage_UnknownExtensions(t *testing.T) {
132  	dir := t.TempDir()
133  
134  	// 创建未知扩展名文件
135  	require.NoError(t, os.WriteFile(filepath.Join(dir, "README.md"), []byte("# docs"), 0644))
136  	require.NoError(t, os.WriteFile(filepath.Join(dir, "config.yaml"), []byte("key: val"), 0644))
137  	// 一个已知语言文件
138  	require.NoError(t, os.WriteFile(filepath.Join(dir, "app.js"), []byte("console.log()"), 0644))
139  
140  	stats := AnalyzeLanguage(dir)
141  
142  	// 未知扩展名不计入,JavaScript 应统计为 1
143  	assert.Equal(t, 1, stats["JavaScript"], "JavaScript 文件数应为 1")
144  	assert.Len(t, stats, 1, "只有一种已知语言")
145  }
146  
147  // TestAnalyzeLanguage_SubDir 测试子目录也被递归统计
148  func TestAnalyzeLanguage_SubDir(t *testing.T) {
149  	dir := t.TempDir()
150  	subDir := filepath.Join(dir, "sub")
151  	require.NoError(t, os.Mkdir(subDir, 0755))
152  
153  	// 在根目录和子目录各放一个 Go 文件
154  	require.NoError(t, os.WriteFile(filepath.Join(dir, "root.go"), []byte(""), 0644))
155  	require.NoError(t, os.WriteFile(filepath.Join(subDir, "sub.go"), []byte(""), 0644))
156  
157  	stats := AnalyzeLanguage(dir)
158  	// 递归遍历后 Go 总数应为 2
159  	assert.Equal(t, 2, stats["Go"], "子目录中的 Go 文件也应被统计")
160  }
161  
162  // TestAnalyzeLanguage_GetTopLanguage_Integration 集成测试:AnalyzeLanguage + GetTopLanguage
163  func TestAnalyzeLanguage_GetTopLanguage_Integration(t *testing.T) {
164  	dir := t.TempDir()
165  
166  	// 创建更多 Rust 文件,让 Rust 成为最多语言
167  	for _, name := range []string{"a.rs", "b.rs", "c.rs"} {
168  		require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte("fn main(){}"), 0644))
169  	}
170  	require.NoError(t, os.WriteFile(filepath.Join(dir, "main.py"), []byte(""), 0644))
171  
172  	stats := AnalyzeLanguage(dir)
173  	top := GetTopLanguage(stats)
174  	// Rust 文件最多,应为 top language
175  	assert.Equal(t, "Rust", top, "Rust 文件最多时应为 top language")
176  }