Merging Fine-tuned Models: Hands-On Tutorial
Combining multiple LoRA adapters with model merging — step-by-step implementation guide
Merging Fine-tuned Models: Hands-On Tutorial
Combining multiple LoRA adapters with model merging — step-by-step implementation guide
Merging Fine-tuned Models Overview Combining multiple LoRA adapters with model merging. This tutorial provides a complete, runnable implementation. Prerequisites ```bash Install required packages pip install transformers datasets peft trl acceler
Merging Fine-tuned Models
Overview
Combining multiple LoRA adapters with model merging. This tutorial provides a complete, runnable implementation.
Prerequisites
bash
Install required packages
pip install transformers datasets peft trl accelerate bitsandbytes
pip install mergekitVerify GPU access
python -c "import torch; print(torch.cuda.is_available())"
Dataset Preparation
python
from datasets import Dataset, load_dataset
import jsondef prepare_dataset(examples: list[dict]) -> Dataset:
"""
Prepare dataset for model merging fine-tuning.
Expected format:
[{"instruction": "...", "input": "...", "output": "..."}]
"""
def format_example(example):
instruction = example.get("instruction", "")
input_text = example.get("input", "")
output = example.get("output", "")
if input_text:
prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n{output}"
else:
prompt = f"### Instruction:\n{instruction}\n\n### Response:\n{output}"
return {"text": prompt}
formatted = [format_example(ex) for ex in examples]
return Dataset.from_list(formatted)
Load or create your dataset
Example: load from HuggingFace
dataset = load_dataset("your-org/your-dataset", split="train")Or create from your own data
examples = [
{
"instruction": "Classify this text",
"input": "Sample text here",
"output": "Category: Positive"
}
]
dataset = prepare_dataset(examples)
print(f"Dataset size: {len(dataset)}")
print(f"Sample: {dataset[0]['text'][:200]}")
Model Setup with MERGEKIT
python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskTypeModel configuration
MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct" # or your base model
OUTPUT_DIR = "./fine-tuned-model"Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"QLoRA: 4-bit quantization config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
)Load base model
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
)Configure LoRA
lora_config = LoraConfig(
r=16, # Rank - higher = more parameters
lora_alpha=32, # Scaling factor
target_modules=[ # Which layers to adapt
"q_proj", "v_proj",
"k_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"
],
lora_dropout=0.05,
bias="none",
task_type=TaskType.CAUSAL_LM,
)Apply LoRA
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
Output: trainable params: 6.7M || all params: 1.24B || trainable%: 0.54%
Training Configuration
python
from transformers import TrainingArguments
from trl import SFTTrainerTraining arguments
training_args = TrainingArguments(
output_dir=OUTPUT_DIR,
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=4, # Effective batch = 16
gradient_checkpointing=True, # Save memory
optim="paged_adamw_32bit",
learning_rate=2e-4,
lr_scheduler_type="cosine",
warmup_ratio=0.03,
weight_decay=0.001,
max_grad_norm=0.3,
logging_steps=25,
save_steps=500,
eval_steps=500,
fp16=True, # Use bf16=True for Ampere GPUs
report_to="mlflow", # Track with MLflow
run_name="fine-tuning-run-1",
)Initialize trainer
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
tokenizer=tokenizer,
args=training_args,
dataset_text_field="text",
max_seq_length=2048,
packing=False,
)Train!
trainer.train()Save the fine-tuned adapter
trainer.model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"Model saved to {OUTPUT_DIR}")
Inference with Fine-tuned Model
python
from peft import PeftModelLoad base model + adapter
base_model = AutoModelForCausalLM.from_pretrained(
MODEL_ID, device_map="auto", torch_dtype=torch.float16
)
model = PeftModel.from_pretrained(base_model, OUTPUT_DIR)
model.eval()def generate(instruction: str, input_text: str = "") -> str:
"""Generate with fine-tuned model."""
if input_text:
prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n"
else:
prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.1,
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
return response.strip()
Test
response = generate("Explain model merging in simple terms")
print(response)
Evaluation
python
from evaluate import load
import numpy as npLoad evaluation metrics
rouge = load("rouge")
bleu = load("bleu")def evaluate_model(test_examples: list[dict], model_fn) -> dict:
"""Evaluate fine-tuned model quality."""
predictions = []
references = []
for ex in test_examples:
pred = model_fn(ex["instruction"], ex.get("input", ""))
predictions.append(pred)
references.append(ex["output"])
rouge_scores = rouge.compute(predictions=predictions, references=references)
return {
"rouge1": rouge_scores["rouge1"],
"rouge2": rouge_scores["rouge2"],
"rougeL": rouge_scores["rougeL"],
"num_examples": len(predictions)
}
results = evaluate_model(test_examples, generate)
print(f"Evaluation results: {results}")
GPU Memory Requirements
Best Practices
Resources
相关工具
相关教程
Training on multiple tasks simultaneously for generalization — step-by-step implementation guide
Using GPT-4 to generate fine-tuning data synthetically — step-by-step implementation guide
Fine-tuning LLMs to follow instructions with supervised learning — step-by-step implementation guide