saadkhi
/

phi3-mini-sql-lora

Text Generation

Model card Files Files and versions

phi3-mini-sql-lora / test.py

saadkhi's picture

Upload folder using huggingface_hub

c73c14f verified about 2 months ago

history blame contribute delete

1.21 kB

	# test.py
	from unsloth import FastLanguageModel
	import torch

	# Load base 4-bit model (downloads once ~7.5GB, then cached forever)
	model, tokenizer = FastLanguageModel.from_pretrained(
	"unsloth/Phi-3-mini-4k-instruct-bnb-4bit",
	max_seq_length=2048,
	dtype=None,
	load_in_4bit=True,
	)

	# Load YOUR fine-tuned LoRA (super fast, uses(adapter_model.safetensors)
	model = FastLanguageModel.get_peft_model(
	model,
	r=64,
	target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
	lora_alpha=128,
	lora_dropout=0,
	bias="none",
	use_gradient_checkpointing=False,
	)

	# Point directly to your folder (where adapter_model.safetensors is)
	model.load_adapter("/home/saad/Downloads/phi3-mini-lora-only/content/phi3-mini-lora-only")

	FastLanguageModel.for_inference(model)

	# Test it
	messages = [{"role": "user", "content": "delete duplicate entries in a table using two columns"}]
	inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")

	outputs = model.generate(inputs, max_new_tokens=512, temperature=0.7, do_sample=True)
	print(tokenizer.decode(outputs[0], skip_special_tokens=False))