/ mlflow / transformers / peft.py
peft.py
 1  """
 2  PEFT (Parameter-Efficient Fine-Tuning) is a library for efficiently adapting large pretrained
 3  models without fine-tuning all of model parameters but only a small number of (extra) parameters.
 4  Users can define a PEFT model that wraps a Transformer model to apply a thin adapter layer on
 5  top of the base model. The PEFT model provides almost the same APIs as the original model such
 6  as from_pretrained(), save_pretrained().
 7  """
 8  
 9  _PEFT_ADAPTOR_DIR_NAME = "peft"
10  
11  
12  def is_peft_model(model) -> bool:
13      try:
14          from peft import PeftModel
15      except ImportError:
16          return False
17  
18      return isinstance(model, PeftModel)
19  
20  
21  def get_peft_base_model(model):
22      """Extract the base model from a PEFT model."""
23      peft_config = model.peft_config.get(model.active_adapter) if model.peft_config else None
24  
25      # PEFT usually wraps the base model with two additional classes, one is PeftModel class
26      # and the other is the adaptor specific class, like LoraModel class, so the class hierarchy
27      # looks like PeftModel -> LoraModel -> BaseModel
28      # However, when the PEFT config is the one for "prompt learning", there is not adaptor class
29      # and the PeftModel class directly wraps the base model.
30      if peft_config and not peft_config.is_prompt_learning:
31          return model.base_model.model
32  
33      return model.base_model
34  
35  
36  def get_model_with_peft_adapter(base_model, peft_adapter_path):
37      """
38      Apply the PEFT adapter to the base model to create a PEFT model.
39  
40      NB: The alternative way to load PEFT adapter is to use load_adapter API like
41      `base_model.load_adapter(peft_adapter_path)`, as it injects the adapter weights
42      into the model in-place hence reducing the memory footprint. However, doing so
43      returns the base model class and not the PEFT model, losing some properties
44      such as peft_config. This is not preferable because load_model API should
45      return the exact same object that was saved. Hence we construct the PEFT model
46      instead of in-place injection, for consistency over the memory saving which
47      should be small in most cases.
48      """
49      from peft import PeftModel
50  
51      return PeftModel.from_pretrained(base_model, peft_adapter_path)