import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

print("🚀 Loading AI/ML Tutor with Vicuna...")

# Configure 4-bit quantization to reduce memory usage
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

base_model = "TheBloke/vicuna-7B-1.1-HF"
adapter_path = "shahzaib41202/ai-ml-tutor-v1"

try:
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(adapter_path)
    tokenizer.pad_token = tokenizer.eos_token

    # Load base model with 4-bit quantization
    model = AutoModelForCausalLM.from_pretrained(
        base_model,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    # Load your fine-tuned adapter
    model = PeftModel.from_pretrained(model, adapter_path)
    print("✅ Vicuna AI Tutor Loaded with 4-bit quantization!")
    
except Exception as e:
    print(f"❌ Failed to load model: {e}")
    model = None
    tokenizer = None

def ask_ai_tutor(question):
    if model is None or tokenizer is None:
        return "AI Tutor is currently unavailable. Please try again later."
    
    try:
        prompt = f"Question: {question}\nAnswer (from AI ML Tutor created by Shahzaib Shaikh):"
        
        # Tokenize
        inputs = tokenizer(
            prompt, 
            return_tensors="pt", 
            truncation=True, 
            max_length=256  # Reduced for memory
        )
        
        # Generate with minimal memory usage
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=150,  # Shorter responses to save memory
                temperature=0.7,
                do_sample=True,
                top_p=0.9,
                repetition_penalty=1.1
            )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        if "Answer (from AI ML Tutor created by Shahzaib Shaikh):" in response:
            answer = response.split("Answer (from AI ML Tutor created by Shahzaib Shaikh):")[-1].strip()
        else:
            answer = response
        
        return answer
        
    except Exception as e:
        return f"❌ Generation error: {str(e)}"

# Create interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🤖 AI/ML Tutor by Shahzaib Shaikh")
    gr.Markdown("### Powered by Vicuna-7B (4-bit quantized)")
    
    with gr.Row():
        with gr.Column():
            question = gr.Textbox(
                label="Your Question",
                placeholder="Ask about Machine Learning, NLP, Random Forest, Decision Trees...",
                lines=3
            )
            ask_btn = gr.Button("🚀 Ask AI Tutor", variant="primary")
            
            answer = gr.Textbox(
                label="AI Tutor Response", 
                lines=6,
                interactive=False,
                show_copy_button=True
            )
            
            clear_btn = gr.Button("🗑️ Clear")
        
        with gr.Column():
            gr.Markdown("### 💡 Quick Examples")
            gr.Examples(
                examples=[
                    "What is Machine Learning?",
                    "What is NLP?",
                    "What is Random Forest?",
                    "What is Decision Tree?"
                ],
                inputs=question,
                label="Click any example"
            )
            
            gr.Markdown("### ℹ️ Note")
            gr.Markdown("Using Vicuna-7B with 4-bit quantization for memory efficiency")
    
    def get_answer(q):
        if not q.strip():
            return "Please enter a question."
        return ask_ai_tutor(q)
    
    def clear_all():
        return "", ""
    
    ask_btn.click(get_answer, question, answer)
    question.submit(get_answer, question, answer)
    clear_btn.click(clear_all, outputs=[question, answer])

if __name__ == "__main__":
    demo.launch()