import gradio as gr
from langchain_huggingface import HuggingFacePipeline
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

# Load lightweight model (distilgpt2, ~82M parameters, fast on CPU)
llm = HuggingFacePipeline.from_model_id(
    model_id="distilgpt2",
    task="text-generation",
    pipeline_kwargs={
        "max_new_tokens": 100,  # Short responses for speed
        "do_sample": True,
        "temperature": 0.7,    # Balanced creativity
        "top_k": 40,
        "top_p": 0.9
    }
)

# Set up conversation memory
memory = ConversationBufferMemory()

# Create a simple conversation chain
conversation = ConversationChain(
    llm=llm,
    memory=memory,
    verbose=False  # No logging for speed
)

# Clear memory function (called via a separate button in the UI)
def clear_memory():
    memory.clear()
    return "Conversation history cleared!"

# Chat function
def chat_with_agent(message, history):
    try:
        response = conversation.predict(input=message)
        # Clean up response (distilgpt2 can be verbose)
        response = response.strip().split("\n")[0]
    except Exception as e:
        response = f"Error: {str(e)}. Try rephrasing your question."
    return response

# Gradio chat interface
with gr.Blocks() as iface:
    gr.Markdown("# Fast Free AI Agent")
    gr.Markdown("A lightweight conversational AI that remembers our talks. Hosted free on Hugging Face Spaces. Responses in ~3-10 seconds.")
    chatbot = gr.ChatInterface(
        fn=chat_with_agent,
        examples=[
            {"text": "My name is Alex. What's my name?"},
            {"text": "Tell me a short joke."}
        ],
        title="Chat with Your AI Agent",
        description="Type your message below to chat. The AI remembers our conversation!"
    )
    gr.Button("Clear Conversation History").click(fn=clear_memory, outputs=gr.Textbox())

# Launch the app
if __name__ == "__main__":
    iface.launch()