import gradio as gr from langchain_huggingface import HuggingFacePipeline from langchain.chains import ConversationChain from langchain.memory import ConversationBufferMemory # Load lightweight model (distilgpt2, ~82M parameters, fast on CPU) llm = HuggingFacePipeline.from_model_id( model_id="distilgpt2", task="text-generation", pipeline_kwargs={ "max_new_tokens": 100, # Short responses for speed "do_sample": True, "temperature": 0.7, # Balanced creativity "top_k": 40, "top_p": 0.9 } ) # Set up conversation memory memory = ConversationBufferMemory() # Create a simple conversation chain conversation = ConversationChain( llm=llm, memory=memory, verbose=False # No logging for speed ) # Clear memory function (called via a separate button in the UI) def clear_memory(): memory.clear() return "Conversation history cleared!" # Chat function def chat_with_agent(message, history): try: response = conversation.predict(input=message) # Clean up response (distilgpt2 can be verbose) response = response.strip().split("\n")[0] except Exception as e: response = f"Error: {str(e)}. Try rephrasing your question." return response # Gradio chat interface with gr.Blocks() as iface: gr.Markdown("# Fast Free AI Agent") gr.Markdown("A lightweight conversational AI that remembers our talks. Hosted free on Hugging Face Spaces. Responses in ~3-10 seconds.") chatbot = gr.ChatInterface( fn=chat_with_agent, examples=[ {"text": "My name is Alex. What's my name?"}, {"text": "Tell me a short joke."} ], title="Chat with Your AI Agent", description="Type your message below to chat. The AI remembers our conversation!" ) gr.Button("Clear Conversation History").click(fn=clear_memory, outputs=gr.Textbox()) # Launch the app if __name__ == "__main__": iface.launch()