import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from peft import PeftModel print("🚀 Loading AI/ML Tutor with Vicuna...") # Configure 4-bit quantization to reduce memory usage bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16 ) base_model = "TheBloke/vicuna-7B-1.1-HF" adapter_path = "shahzaib41202/ai-ml-tutor-v1" try: # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(adapter_path) tokenizer.pad_token = tokenizer.eos_token # Load base model with 4-bit quantization model = AutoModelForCausalLM.from_pretrained( base_model, quantization_config=bnb_config, device_map="auto", trust_remote_code=True ) # Load your fine-tuned adapter model = PeftModel.from_pretrained(model, adapter_path) print("✅ Vicuna AI Tutor Loaded with 4-bit quantization!") except Exception as e: print(f"❌ Failed to load model: {e}") model = None tokenizer = None def ask_ai_tutor(question): if model is None or tokenizer is None: return "AI Tutor is currently unavailable. Please try again later." try: prompt = f"Question: {question}\nAnswer (from AI ML Tutor created by Shahzaib Shaikh):" # Tokenize inputs = tokenizer( prompt, return_tensors="pt", truncation=True, max_length=256 # Reduced for memory ) # Generate with minimal memory usage with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=150, # Shorter responses to save memory temperature=0.7, do_sample=True, top_p=0.9, repetition_penalty=1.1 ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) if "Answer (from AI ML Tutor created by Shahzaib Shaikh):" in response: answer = response.split("Answer (from AI ML Tutor created by Shahzaib Shaikh):")[-1].strip() else: answer = response return answer except Exception as e: return f"❌ Generation error: {str(e)}" # Create interface with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🤖 AI/ML Tutor by Shahzaib Shaikh") gr.Markdown("### Powered by Vicuna-7B (4-bit quantized)") with gr.Row(): with gr.Column(): question = gr.Textbox( label="Your Question", placeholder="Ask about Machine Learning, NLP, Random Forest, Decision Trees...", lines=3 ) ask_btn = gr.Button("🚀 Ask AI Tutor", variant="primary") answer = gr.Textbox( label="AI Tutor Response", lines=6, interactive=False, show_copy_button=True ) clear_btn = gr.Button("đŸ—‘ī¸ Clear") with gr.Column(): gr.Markdown("### 💡 Quick Examples") gr.Examples( examples=[ "What is Machine Learning?", "What is NLP?", "What is Random Forest?", "What is Decision Tree?" ], inputs=question, label="Click any example" ) gr.Markdown("### â„šī¸ Note") gr.Markdown("Using Vicuna-7B with 4-bit quantization for memory efficiency") def get_answer(q): if not q.strip(): return "Please enter a question." return ask_ai_tutor(q) def clear_all(): return "", "" ask_btn.click(get_answer, question, answer) question.submit(get_answer, question, answer) clear_btn.click(clear_all, outputs=[question, answer]) if __name__ == "__main__": demo.launch()