saadkhi's picture
Upload folder using huggingface_hub
c73c14f verified
# test.py
from unsloth import FastLanguageModel
import torch
# Load base 4-bit model (downloads once ~7.5GB, then cached forever)
model, tokenizer = FastLanguageModel.from_pretrained(
"unsloth/Phi-3-mini-4k-instruct-bnb-4bit",
max_seq_length=2048,
dtype=None,
load_in_4bit=True,
)
# Load YOUR fine-tuned LoRA (super fast, uses(adapter_model.safetensors)
model = FastLanguageModel.get_peft_model(
model,
r=64,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_alpha=128,
lora_dropout=0,
bias="none",
use_gradient_checkpointing=False,
)
# Point directly to your folder (where adapter_model.safetensors is)
model.load_adapter("/home/saad/Downloads/phi3-mini-lora-only/content/phi3-mini-lora-only")
FastLanguageModel.for_inference(model)
# Test it
messages = [{"role": "user", "content": "delete duplicate entries in a table using two columns"}]
inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
outputs = model.generate(inputs, max_new_tokens=512, temperature=0.7, do_sample=True)
print(tokenizer.decode(outputs[0], skip_special_tokens=False))