/ repo3-fine-tuning-template / model_utils.py
model_utils.py
1 import torch 2 from transformers import ( 3 AutoModelForCausalLM, 4 AutoTokenizer, 5 BitsAndBytesConfig, 6 TrainingArguments 7 ) 8 from peft import LoraConfig, get_peft_model, TaskType 9 import logging 10 11 logger = logging.getLogger(__name__) 12 13 def load_model_and_tokenizer( 14 model_name: str, 15 use_4bit: bool = True, 16 use_8bit: bool = False, 17 bf16: bool = True, 18 device_map: str = "auto" 19 ): 20 """ 21 加载模型和分词器 22 23 Args: 24 model_name: 模型名称 25 use_4bit: 是否使用4bit量化 26 use_8bit: 是否使用8bit量化 27 bf16: 是否使用bf16精度 28 device_map: 设备映射 29 30 Returns: 31 tuple: (model, tokenizer) 32 """ 33 logger.info(f"正在加载模型: {model_name}") 34 35 # 配置量化参数 36 quantization_config = None 37 if use_4bit: 38 quantization_config = BitsAndBytesConfig( 39 load_in_4bit=True, 40 bnb_4bit_compute_dtype=torch.bfloat16 if bf16 else torch.float16, 41 bnb_4bit_use_double_quant=True, 42 bnb_4bit_quant_type="nf4" 43 ) 44 elif use_8bit: 45 quantization_config = BitsAndBytesConfig(load_in_8bit=True) 46 47 # 加载分词器 48 tokenizer = AutoTokenizer.from_pretrained( 49 model_name, 50 trust_remote_code=True, 51 padding_side="left" 52 ) 53 54 # 设置pad_token 55 if tokenizer.pad_token is None: 56 tokenizer.pad_token = tokenizer.eos_token 57 58 # 加载模型 59 model = AutoModelForCausalLM.from_pretrained( 60 model_name, 61 quantization_config=quantization_config, 62 device_map=device_map, 63 trust_remote_code=True, 64 torch_dtype=torch.bfloat16 if bf16 else torch.float16, 65 attn_implementation="flash_attention_2" if torch.cuda.is_available() else "eager" 66 ) 67 68 logger.info("模型和分词器加载完成") 69 return model, tokenizer 70 71 def create_peft_config( 72 lora_r: int = 16, 73 lora_alpha: int = 32, 74 lora_dropout: float = 0.1, 75 target_modules: list = None 76 ): 77 """ 78 创建PEFT配置 79 80 Args: 81 lora_r: LoRA rank 82 lora_alpha: LoRA alpha 83 lora_dropout: LoRA dropout 84 target_modules: 目标模块列表 85 86 Returns: 87 LoraConfig: LoRA配置 88 """ 89 if target_modules is None: 90 target_modules = ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] 91 92 peft_config = LoraConfig( 93 task_type=TaskType.CAUSAL_LM, 94 inference_mode=False, 95 r=lora_r, 96 lora_alpha=lora_alpha, 97 lora_dropout=lora_dropout, 98 target_modules=target_modules, 99 bias="none" 100 ) 101 102 logger.info(f"PEFT配置创建完成: r={lora_r}, alpha={lora_alpha}, dropout={lora_dropout}") 103 return peft_config 104 105 def apply_peft_to_model(model, peft_config): 106 """ 107 将PEFT配置应用到模型 108 109 Args: 110 model: 基础模型 111 peft_config: PEFT配置 112 113 Returns: 114 model: 应用PEFT后的模型 115 """ 116 logger.info("正在应用PEFT配置到模型...") 117 118 # 冻结基础模型参数 119 for param in model.parameters(): 120 param.requires_grad = False 121 122 # 应用PEFT 123 model = get_peft_model(model, peft_config) 124 125 # 打印可训练参数信息 126 model.print_trainable_parameters() 127 128 logger.info("PEFT配置应用完成") 129 return model 130 131 def create_training_arguments( 132 output_dir: str, 133 num_train_epochs: int = 3, 134 per_device_train_batch_size: int = 2, 135 per_device_eval_batch_size: int = 2, 136 gradient_accumulation_steps: int = 4, 137 learning_rate: float = 5e-5, 138 warmup_steps: int = 100, 139 logging_steps: int = 10, 140 save_steps: int = 500, 141 eval_steps: int = 500, 142 save_total_limit: int = 3, 143 load_best_model_at_end: bool = True, 144 metric_for_best_model: str = "eval_loss", 145 greater_is_better: bool = False, 146 logging_dir: str = "logs", 147 beta: float = 0.1, 148 max_prompt_length: int = 512, 149 max_length: int = 1024 150 ): 151 """ 152 创建训练参数 153 154 Args: 155 output_dir: 输出目录 156 num_train_epochs: 训练轮数 157 per_device_train_batch_size: 每个设备的训练批次大小 158 per_device_eval_batch_size: 每个设备的评估批次大小 159 gradient_accumulation_steps: 梯度累积步数 160 learning_rate: 学习率 161 warmup_steps: 预热步数 162 logging_steps: 日志记录步数 163 save_steps: 保存步数 164 eval_steps: 评估步数 165 save_total_limit: 保存模型总数限制 166 load_best_model_at_end: 是否在结束时加载最佳模型 167 metric_for_best_model: 最佳模型指标 168 greater_is_better: 指标是否越大越好 169 logging_dir: 日志目录 170 171 Returns: 172 TrainingArguments: 训练参数 173 """ 174 training_args = TrainingArguments( 175 output_dir=output_dir, 176 num_train_epochs=num_train_epochs, 177 per_device_train_batch_size=per_device_train_batch_size, 178 per_device_eval_batch_size=per_device_eval_batch_size, 179 gradient_accumulation_steps=gradient_accumulation_steps, 180 learning_rate=learning_rate, 181 warmup_steps=warmup_steps, 182 logging_steps=logging_steps, 183 save_steps=save_steps, 184 eval_steps=eval_steps, 185 save_total_limit=save_total_limit, 186 load_best_model_at_end=load_best_model_at_end, 187 metric_for_best_model=metric_for_best_model, 188 greater_is_better=greater_is_better, 189 logging_dir=logging_dir, 190 logging_strategy="steps", 191 eval_strategy="steps", 192 save_strategy="steps", 193 report_to="wandb" if torch.cuda.is_available() else None, 194 remove_unused_columns=False, 195 dataloader_pin_memory=False, 196 bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(), 197 fp16=torch.cuda.is_available() and not torch.cuda.is_bf16_supported(), 198 dataloader_num_workers=4, 199 group_by_length=True, 200 ddp_find_unused_parameters=False, 201 ) 202 203 logger.info("训练参数创建完成") 204 return training_args 205 206 def save_model_and_tokenizer(model, tokenizer, output_dir: str): 207 """ 208 保存模型和分词器 209 210 Args: 211 model: 训练后的模型 212 tokenizer: 分词器 213 output_dir: 输出目录 214 """ 215 logger.info(f"正在保存模型和分词器到: {output_dir}") 216 217 # 保存分词器 218 tokenizer.save_pretrained(output_dir) 219 220 # 保存模型 221 if hasattr(model, 'save_pretrained'): 222 model.save_pretrained(output_dir) 223 else: 224 # 如果是PEFT模型,需要特殊处理 225 model.save_pretrained(output_dir) 226 227 logger.info("模型和分词器保存完成")