今日房价最新消息_国家建筑工程信息平台_seo整站排名_合肥网站关键词排名

参考资料：

https://openbayes.com/console/bbruceyuan/containers/OPg9Oo99ET6
https://www.bilibili.com/video/BV1NM1tY3Eu5

LoRA微调案例

首先还是要安装：

!pip install -q accelerate peft bitsandbytes transformers sentencepiece
!pip install trl==0.12.0

配置环境变量：

import os
os.environ["HF_HOME"] = "/openbayes/home/huggingface"

读取数据集（这是一个将文言文翻译成现代文的数据集）

from transformers import AutoTokenizer
from datasets import load_dataset
# 加载一个 tokenizer 来使用它的聊天模板
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
def format_prompt(example):chat = [{"role": "system", "content": "你是一个非常棒的人工智能助手，是UP主 “用代码打点酱油的chaofa” 开发的"},{"role": "user", "content": example["input"]},{"role": "assistant", "content": example["target"]}]prompt = tokenizer.apply_chat_template(chat, tokenize=False)return {"text": prompt}
# Load and format the data using the template TinyLLama is using
dataset = load_dataset("YeungNLP/firefly-train-1.1M", split="train[:500]") 
dataset = dataset.map(format_prompt)

这里要顺便用模型的tokenizer来apply一下prompt_template

获得的dataset形如：

Dataset({features: ['kind', 'input', 'target', 'text'],num_rows: 500
})

然后可以查看一个format好的prompt：

# Example of formatted prompt
print(dataset["text"][100])Output:
<|im_start|>system
你是一个非常棒的人工智能助手，是UP主 “用代码打点酱油的chaofa” 开发的<|im_end|>
<|im_start|>user
我当时在三司，访求太祖、仁宗的手书敕令没有见到，然而人人能传诵那些话，禁止私盐的建议也最终被搁置。
翻译成文言文：<|im_end|>
<|im_start|>assistant
余时在三司，求访两朝墨敕不获，然人人能诵其言，议亦竟寝。<|im_end|>

接下来开始加载模型，我们使用量化来节约显存：

# 量化，为了省显存
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfigmodel_name = "Qwen/Qwen2.5-0.5B-Instruct"# 4-bit quantization configuration - Q in QLoRA
bnb_config = BitsAndBytesConfig(load_in_4bit=True,  # Use 4-bit precision model loadingbnb_4bit_quant_type="nf4",  # Quantization typebnb_4bit_compute_dtype="float16",  # Compute dtypebnb_4bit_use_double_quant=True,  # Apply nested quantization
)
# Load the model to train on the GPU
model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",# Leave this out for regular SFTquantization_config=bnb_config,
)
model.config.use_cache = False
model.config.pretraining_tp = 1  # 上面这两个配置，只有在 k-bit 量化的时候需要设置
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
# tokenizer.pad_token = "<PAD>"  # qwen2 的 pad token 不是 <pad>，所以用 <im_end>，因此需要注释掉
tokenizer.padding_side = "left"   # 训练不重要，推理比较重要

然后配置LoraConfig：

from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model# Prepare LoRA Configuration
peft_config = LoraConfig(lora_alpha=32,  # LoRA Scalinglora_dropout=0.1,  # Dropout for LoRA Layersr=64,  # Rank，可训练数据越多，设置越大bias="none",task_type="CAUSAL_LM",target_modules=  ['k_proj', 'v_proj', 'q_proj']# Layers to target#  ['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)# prepare model for training
model = prepare_model_for_kbit_training(model)
# 如果没有 prepare_model_for_kbit_training，
# 且 training args 中配置了 gradient_checkpointing=True （这个其实也是为了省显存，其实不重要）
# 那么需要设置 model.enable_input_require_grads()
# model.enable_input_require_grads()
model = get_peft_model(model, peft_config)

一些训练的配置

from transformers import TrainingArgumentsoutput_dir = "./results"# Training arguments
training_arguments = TrainingArguments(output_dir=output_dir,per_device_train_batch_size=2,gradient_accumulation_steps=4,optim="adamw_torch",learning_rate=2e-4,lr_scheduler_type="cosine",num_train_epochs=1,logging_steps=10,fp16=True,gradient_checkpointing=True
)

就绪之后就是直接跑trl.SFTTrainer即可：

from trl import SFTTrainer
# Set supervised fine-tuning parameters
trainer = SFTTrainer(model=model,train_dataset=dataset,dataset_text_field="text",   # 注意 dataset 中的 text 字段tokenizer=tokenizer,args=training_arguments,max_seq_length=512,# Leave this out for regular SFTpeft_config=peft_config,
)
# Train model
trainer.train()
# Save QLoRA weights
trainer.model.save_pretrained("qwen2.5-0.5b-instruct-chaofa")

微调后需要做什么？

Merge Adapter (LoRA 和 base model 合并)

from peft import AutoPeftModelForCausalLM
model = AutoPeftModelForCausalLM.from_pretrained("qwen2.5-0.5b-instruct-chaofa",low_cpu_mem_usage=True,device_map="auto",
)
# Merge LoRA and base model
merged_model = model.merge_and_unload()

from transformers import pipeline
pipe = pipeline(task="text-generation", model=merged_model, tokenizer=tokenizer)
prompt_example = """<|im_start|>system
你是一个非常棒的人工智能助手，是UP主 “用代码打点酱油的chaofa” 开发的。<|im_end|>
<|im_start|>user
天气太热了，所以我今天没有学习一点。
翻译成文言文：<|im_end|>
<|im_start|>assistant
"""
print(pipe(prompt_example, max_new_tokens=50)[0]["generated_text"])

输出结果：

<|im_start|>system
你是一个非常棒的人工智能助手，是UP主 “用代码打点酱油的chaofa” 开发的。<|im_end|>
<|im_start|>user
天气太热了，所以我今天没有学习一点。
翻译成文言文：<|im_end|>
<|im_start|>assistant
天气甚热，故今日无学一息。

其他微调方法

PPO / DPO（以Llama为例）

依然是先加载数据集

from datasets import load_dataset
def format_prompt(example):"""Format the prompt to using the <|user|> template TinyLLama is using"""# Format answerssystem = "<|system|>\n" + example['system'] + "</s>\n"prompt = "<|user|>\n" + example['input'] + "</s>\n<|assistant|>\n"chosen = example['chosen'] + "</s>\n"rejected = example['rejected'] + "</s>\n"return {"prompt": system + prompt,"chosen": chosen,"rejected": rejected,}# Apply formatting to the dataset and select relatively short answers
dpo_dataset = load_dataset("argilla/distilabel-intel-orca-dpo-pairs", split="train")
dpo_dataset = dpo_dataset.filter(lambda r:r["status"] != "tie" andr["chosen_score"] >= 8 andnot r["in_gsm8k_train"]
)
dpo_dataset = dpo_dataset.map(format_prompt, remove_columns=dpo_dataset.column_names)
dpo_dataset
"""
Dataset({features: ['chosen', 'rejected', 'prompt'],num_rows: 5922
})
"""

然后同样的加载模型（使用量化节约显存）

from peft import AutoPeftModelForCausalLM
from transformers import BitsAndBytesConfig, AutoTokenizer# 4-bit quantization configuration - Q in QLoRA
bnb_config = BitsAndBytesConfig(load_in_4bit=True,  # Use 4-bit precision model loadingbnb_4bit_quant_type="nf4",  # Quantization typebnb_4bit_compute_dtype="float16",  # Compute dtypebnb_4bit_use_double_quant=True,  # Apply nested quantization
)# Merge LoRA and base model
model = AutoPeftModelForCausalLM.from_pretrained("TinyLlama-1.1B-qlora",low_cpu_mem_usage=True,device_map="auto",quantization_config=bnb_config,
)
merged_model = model.merge_and_unload()# Load LLaMA tokenizer
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = "<PAD>"
tokenizer.padding_side = "left"

这里使用的是TinyLlama-1.1B-qlora

接着还是先配置LoRA的Config：

from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model# Prepare LoRA Configuration
peft_config = LoraConfig(lora_alpha=32,  # LoRA Scalinglora_dropout=0.1,  # Dropout for LoRA Layersr=64,  # Rankbias="none",task_type="CAUSAL_LM",target_modules=  # Layers to target['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)# prepare model for training
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

然后在训练参数设置时，可以使用DPO策略进行训练微调：

from trl import DPOConfigoutput_dir = "./results"# Training arguments
training_arguments = DPOConfig(output_dir=output_dir,per_device_train_batch_size=2,gradient_accumulation_steps=4,optim="paged_adamw_32bit",learning_rate=1e-5,lr_scheduler_type="cosine",max_steps=200,logging_steps=10,fp16=True,gradient_checkpointing=True,warmup_ratio=0.1
)

如法炮制，开干：

from trl import DPOTrainer# Create DPO trainer
dpo_trainer = DPOTrainer(model,args=training_arguments,train_dataset=dpo_dataset,tokenizer=tokenizer,peft_config=peft_config,beta=0.1,max_prompt_length=512,max_length=512,
)

同理最后需要合并Adapters

from peft import PeftModel# Merge LoRA and base model
model = AutoPeftModelForCausalLM.from_pretrained("TinyLlama-1.1B-qlora",low_cpu_mem_usage=True,device_map="auto",
)
sft_model = model.merge_and_unload()# Merge DPO LoRA and SFT model
dpo_model = PeftModel.from_pretrained(sft_model,"TinyLlama-1.1B-dpo-qlora",device_map="auto",
)
dpo_model = dpo_model.merge_and_unload()

微调好的模型，直接使用transformers的pipeline调用

from transformers import pipeline
# Use our predefined prompt template
prompt = """<|user|>
Tell me something about Large Language Models.</s>
<|assistant|>
"""
# Run our instruction-tuned model
pipe = pipeline(task="text-generation", model=dpo_model, tokenizer=tokenizer)
print(pipe(prompt)[0]["generated_text"])