Spaces:
Sleeping
Sleeping
File size: 4,251 Bytes
70d71e7 3a698eb 70d71e7 3a698eb 70d71e7 3a698eb 70d71e7 3a698eb 70d71e7 3a698eb 70d71e7 3a698eb 70d71e7 3a698eb 70d71e7 3a698eb 70d71e7 994be0e 70d71e7 3a698eb 70d71e7 3a698eb 70d71e7 3a698eb 70d71e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
print("π Loading AI/ML Tutor with Vicuna...")
# Configure 4-bit quantization to reduce memory usage
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16
)
base_model = "TheBloke/vicuna-7B-1.1-HF"
adapter_path = "shahzaib41202/ai-ml-tutor-v1"
try:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(adapter_path)
tokenizer.pad_token = tokenizer.eos_token
# Load base model with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
base_model,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
# Load your fine-tuned adapter
model = PeftModel.from_pretrained(model, adapter_path)
print("β
Vicuna AI Tutor Loaded with 4-bit quantization!")
except Exception as e:
print(f"β Failed to load model: {e}")
model = None
tokenizer = None
def ask_ai_tutor(question):
if model is None or tokenizer is None:
return "AI Tutor is currently unavailable. Please try again later."
try:
prompt = f"Question: {question}\nAnswer (from AI ML Tutor created by Shahzaib Shaikh):"
# Tokenize
inputs = tokenizer(
prompt,
return_tensors="pt",
truncation=True,
max_length=256 # Reduced for memory
)
# Generate with minimal memory usage
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=150, # Shorter responses to save memory
temperature=0.7,
do_sample=True,
top_p=0.9,
repetition_penalty=1.1
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
if "Answer (from AI ML Tutor created by Shahzaib Shaikh):" in response:
answer = response.split("Answer (from AI ML Tutor created by Shahzaib Shaikh):")[-1].strip()
else:
answer = response
return answer
except Exception as e:
return f"β Generation error: {str(e)}"
# Create interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# π€ AI/ML Tutor by Shahzaib Shaikh")
gr.Markdown("### Powered by Vicuna-7B (4-bit quantized)")
with gr.Row():
with gr.Column():
question = gr.Textbox(
label="Your Question",
placeholder="Ask about Machine Learning, NLP, Random Forest, Decision Trees...",
lines=3
)
ask_btn = gr.Button("π Ask AI Tutor", variant="primary")
answer = gr.Textbox(
label="AI Tutor Response",
lines=6,
interactive=False,
show_copy_button=True
)
clear_btn = gr.Button("ποΈ Clear")
with gr.Column():
gr.Markdown("### π‘ Quick Examples")
gr.Examples(
examples=[
"What is Machine Learning?",
"What is NLP?",
"What is Random Forest?",
"What is Decision Tree?"
],
inputs=question,
label="Click any example"
)
gr.Markdown("### βΉοΈ Note")
gr.Markdown("Using Vicuna-7B with 4-bit quantization for memory efficiency")
def get_answer(q):
if not q.strip():
return "Please enter a question."
return ask_ai_tutor(q)
def clear_all():
return "", ""
ask_btn.click(get_answer, question, answer)
question.submit(get_answer, question, answer)
clear_btn.click(clear_all, outputs=[question, answer])
if __name__ == "__main__":
demo.launch() |