Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import spaces | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # Load model and tokenizer | |
| model_name = "HuggingFaceTB/SmolLM3-3B" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto") | |
| def chat_with_smollm3(message, history, system_prompt="", enable_thinking=True, temperature=0.6, top_p=0.95, max_tokens=32768): | |
| """ | |
| Chat with SmolLM3-3B model with full feature support | |
| """ | |
| # Prepare messages | |
| messages = [] | |
| # Add system prompt if provided | |
| if system_prompt.strip(): | |
| # Handle thinking mode flags in system prompt | |
| if enable_thinking and "/no_think" not in system_prompt: | |
| if "/think" not in system_prompt: | |
| system_prompt += "/think" | |
| elif not enable_thinking and "/think" not in system_prompt: | |
| if "/no_think" not in system_prompt: | |
| system_prompt += "/no_think" | |
| messages.append({"role": "system", "content": system_prompt}) | |
| else: | |
| # Use enable_thinking parameter if no system prompt | |
| if not enable_thinking: | |
| messages.append({"role": "system", "content": "/no_think"}) | |
| # Add conversation history | |
| for human_msg, assistant_msg in history: | |
| messages.append({"role": "user", "content": human_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| # Add current message | |
| messages.append({"role": "user", "content": message}) | |
| # Apply chat template | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True, | |
| enable_thinking=enable_thinking if not system_prompt.strip() else None | |
| ) | |
| # Tokenize input | |
| model_inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
| # Generate response | |
| with torch.no_grad(): | |
| generated_ids = model.generate( | |
| **model_inputs, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| # Decode response | |
| output_ids = generated_ids[0][len(model_inputs.input_ids[0]):] | |
| response = tokenizer.decode(output_ids, skip_special_tokens=True) | |
| return response | |
| def chat_with_tools(message, history, tools_json="", system_prompt="", enable_thinking=False, temperature=0.6, top_p=0.95, max_tokens=32768): | |
| """ | |
| Chat with SmolLM3-3B using tool calling capabilities | |
| """ | |
| # Parse tools if provided | |
| tools = [] | |
| if tools_json.strip(): | |
| try: | |
| import json | |
| tools = json.loads(tools_json) | |
| except: | |
| return "Error: Invalid JSON format for tools" | |
| # Prepare messages | |
| messages = [] | |
| # Add system prompt if provided | |
| if system_prompt.strip(): | |
| messages.append({"role": "system", "content": system_prompt}) | |
| # Add conversation history | |
| for human_msg, assistant_msg in history: | |
| messages.append({"role": "user", "content": human_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| # Add current message | |
| messages.append({"role": "user", "content": message}) | |
| # Apply chat template with tools | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True, | |
| enable_thinking=enable_thinking, | |
| xml_tools=tools if tools else None | |
| ) | |
| # Tokenize input | |
| model_inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
| # Generate response | |
| with torch.no_grad(): | |
| generated_ids = model.generate( | |
| **model_inputs, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| # Decode response | |
| output_ids = generated_ids[0][len(model_inputs.input_ids[0]):] | |
| response = tokenizer.decode(output_ids, skip_special_tokens=True) | |
| return response | |
| # Example tools for demonstration | |
| example_tools = """[ | |
| { | |
| "name": "get_weather", | |
| "description": "Get the weather in a city", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "city": { | |
| "type": "string", | |
| "description": "The city to get the weather for" | |
| } | |
| } | |
| } | |
| }, | |
| { | |
| "name": "calculate", | |
| "description": "Perform basic mathematical calculations", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "expression": { | |
| "type": "string", | |
| "description": "Mathematical expression to evaluate" | |
| } | |
| } | |
| } | |
| } | |
| ]""" | |
| # Create Gradio interface with dark theme and mobile support | |
| with gr.Blocks( | |
| title="SmolLM3-3B Chat", | |
| theme=gr.themes.Base().set( | |
| background_fill_primary="#1a1a1a", | |
| background_fill_secondary="#2d2d2d", | |
| border_color_primary="#404040", | |
| button_primary_background_fill="#4a9eff", | |
| button_primary_background_fill_hover="#5aa3ff", | |
| button_primary_text_color="#ffffff", | |
| block_background_fill="#2d2d2d", | |
| block_border_color="#404040", | |
| input_background_fill="#3a3a3a", | |
| input_border_color="#404040", | |
| slider_color="#4a9eff", | |
| ), | |
| css=""" | |
| /* Mobile-first responsive design */ | |
| @media (max-width: 768px) { | |
| .gradio-container { | |
| padding: 8px !important; | |
| } | |
| .gr-row { | |
| flex-direction: column !important; | |
| } | |
| .gr-column { | |
| width: 100% !important; | |
| min-width: 0 !important; | |
| } | |
| .gr-tabs { | |
| font-size: 14px !important; | |
| } | |
| .gr-button { | |
| width: 100% !important; | |
| margin: 2px 0 !important; | |
| } | |
| .gr-textbox { | |
| font-size: 16px !important; | |
| } | |
| .gr-chatbot { | |
| height: 400px !important; | |
| } | |
| .gr-markdown { | |
| font-size: 14px !important; | |
| } | |
| .gr-slider { | |
| width: 100% !important; | |
| } | |
| .settings-panel { | |
| margin-top: 20px !important; | |
| } | |
| } | |
| /* Settings panel styling */ | |
| .settings-panel { | |
| background-color: #2d2d2d !important; | |
| border: 1px solid #404040 !important; | |
| border-radius: 8px !important; | |
| padding: 16px !important; | |
| margin-top: 12px !important; | |
| } | |
| .settings-button { | |
| background-color: #3a3a3a !important; | |
| border: 1px solid #404040 !important; | |
| color: #ffffff !important; | |
| padding: 8px 16px !important; | |
| border-radius: 6px !important; | |
| cursor: pointer !important; | |
| font-size: 14px !important; | |
| margin-bottom: 8px !important; | |
| } | |
| .settings-button:hover { | |
| background-color: #4a4a4a !important; | |
| } | |
| /* Dark mode improvements */ | |
| .gr-chatbot { | |
| background-color: #2d2d2d !important; | |
| } | |
| .gr-chatbot .message { | |
| background-color: #3a3a3a !important; | |
| border: 1px solid #404040 !important; | |
| border-radius: 8px !important; | |
| margin: 4px 0 !important; | |
| padding: 8px !important; | |
| } | |
| .gr-chatbot .message.user { | |
| background-color: #4a9eff !important; | |
| color: white !important; | |
| } | |
| .gr-chatbot .message.bot { | |
| background-color: #3a3a3a !important; | |
| color: #ffffff !important; | |
| } | |
| /* Better mobile touch targets */ | |
| @media (max-width: 768px) { | |
| .gr-button { | |
| min-height: 44px !important; | |
| padding: 12px !important; | |
| } | |
| .gr-slider input { | |
| min-height: 44px !important; | |
| } | |
| .gr-checkbox { | |
| min-height: 44px !important; | |
| } | |
| } | |
| /* Improve readability */ | |
| .gr-markdown h1, .gr-markdown h2, .gr-markdown h3 { | |
| color: #ffffff !important; | |
| } | |
| .gr-markdown p, .gr-markdown li { | |
| color: #e0e0e0 !important; | |
| } | |
| /* Tab styling */ | |
| .gr-tabs .gr-tab { | |
| background-color: #3a3a3a !important; | |
| color: #ffffff !important; | |
| border-color: #404040 !important; | |
| } | |
| .gr-tabs .gr-tab.selected { | |
| background-color: #4a9eff !important; | |
| color: #ffffff !important; | |
| } | |
| """ | |
| ) as demo: | |
| gr.Markdown("# π€ SmolLM3-3B Chat Interface") | |
| gr.Markdown("Chat with SmolLM3-3B, a 3B parameter model with advanced reasoning, long context support, and tool calling capabilities.") | |
| with gr.Tabs(): | |
| with gr.TabItem("π¬ Standard Chat"): | |
| chatbot = gr.Chatbot(height=500, label="Chat with SmolLM3-3B") | |
| msg = gr.Textbox(label="Your message", placeholder="Type your message here...") | |
| with gr.Row(): | |
| submit = gr.Button("Send", variant="primary") | |
| clear = gr.Button("Clear") | |
| settings_btn = gr.Button("βοΈ Settings", size="sm") | |
| with gr.Column(visible=False, elem_classes="settings-panel") as settings_panel: | |
| gr.Markdown("### βοΈ Advanced Settings") | |
| system_prompt = gr.Textbox( | |
| label="System Prompt", | |
| placeholder="Enter system instructions (optional)", | |
| lines=3, | |
| value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest." | |
| ) | |
| enable_thinking = gr.Checkbox( | |
| label="Enable Extended Thinking", | |
| value=True, | |
| info="Enable reasoning traces for better responses" | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.0, | |
| maximum=2.0, | |
| value=0.6, | |
| step=0.1, | |
| label="Temperature" | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p" | |
| ) | |
| max_tokens = gr.Slider( | |
| minimum=1, | |
| maximum=32768, | |
| value=32768, | |
| step=1, | |
| label="Max Tokens" | |
| ) | |
| def respond(message, history, sys_prompt, thinking, temp, top_p_val, max_tok): | |
| response = chat_with_smollm3(message, history, sys_prompt, thinking, temp, top_p_val, max_tok) | |
| history.append((message, response)) | |
| return "", history | |
| def toggle_settings(): | |
| return gr.update(visible=not settings_panel.visible) | |
| submit.click(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot]) | |
| msg.submit(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot]) | |
| clear.click(lambda: ([], ""), outputs=[chatbot, msg]) | |
| settings_btn.click(toggle_settings, outputs=[settings_panel]) | |
| with gr.TabItem("π οΈ Tool Calling"): | |
| tool_chatbot = gr.Chatbot(height=500, label="Chat with Tools") | |
| tool_msg = gr.Textbox(label="Your message", placeholder="Ask me to use tools...") | |
| with gr.Row(): | |
| tool_submit = gr.Button("Send", variant="primary") | |
| tool_clear = gr.Button("Clear") | |
| tool_settings_btn = gr.Button("βοΈ Settings", size="sm") | |
| with gr.Column(visible=False, elem_classes="settings-panel") as tool_settings_panel: | |
| gr.Markdown("### π οΈ Tool Settings") | |
| tools_json = gr.Textbox( | |
| label="Tools JSON", | |
| placeholder="Enter tools as JSON array", | |
| lines=10, | |
| value=example_tools | |
| ) | |
| tool_system_prompt = gr.Textbox( | |
| label="System Prompt", | |
| placeholder="Enter system instructions (optional)", | |
| lines=2, | |
| value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest." | |
| ) | |
| tool_thinking = gr.Checkbox( | |
| label="Enable Extended Thinking", | |
| value=False, | |
| info="Enable reasoning traces for tool usage" | |
| ) | |
| tool_temperature = gr.Slider( | |
| minimum=0.0, | |
| maximum=2.0, | |
| value=0.6, | |
| step=0.1, | |
| label="Temperature" | |
| ) | |
| tool_top_p = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p" | |
| ) | |
| tool_max_tokens = gr.Slider( | |
| minimum=1, | |
| maximum=32768, | |
| value=32768, | |
| step=1, | |
| label="Max Tokens" | |
| ) | |
| def tool_respond(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok): | |
| response = chat_with_tools(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok) | |
| history.append((message, response)) | |
| return "", history | |
| def toggle_tool_settings(): | |
| return gr.update(visible=not tool_settings_panel.visible) | |
| tool_submit.click(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot]) | |
| tool_msg.submit(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot]) | |
| tool_clear.click(lambda: ([], ""), outputs=[tool_chatbot, tool_msg]) | |
| tool_settings_btn.click(toggle_tool_settings, outputs=[tool_settings_panel]) | |
| gr.Markdown(""" | |
| ### π Model Information | |
| - **Model**: HuggingFaceTB/SmolLM3-3B | |
| - **Features**: Advanced reasoning, long context (up to 128k tokens), multilingual support | |
| - **Languages**: English, French, Spanish, German, Italian, Portuguese (+ Arabic, Chinese, Russian) | |
| - **Extended Thinking**: Provides reasoning traces for better responses | |
| - **Tool Calling**: Supports XML-based tool calling for agentic workflows | |
| ### π‘ Usage Tips | |
| - Use Extended Thinking for complex reasoning tasks | |
| - Adjust temperature (0.6 recommended) for response creativity | |
| - Try different system prompts for specialized behaviors | |
| - Use tool calling for function-based interactions | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |