Spaces:

ProCreations
/

smollm3

Runtime error

App Files Files Community

smollm3 / app.py

ProCreations

Update app.py

b16f80c verified 5 months ago

raw

history blame contribute delete

15.4 kB

	import gradio as gr
	import spaces
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Load model and tokenizer
	model_name = "HuggingFaceTB/SmolLM3-3B"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")

	@spaces.GPU
	def chat_with_smollm3(message, history, system_prompt="", enable_thinking=True, temperature=0.6, top_p=0.95, max_tokens=32768):
	"""
	Chat with SmolLM3-3B model with full feature support
	"""
	# Prepare messages
	messages = []

	# Add system prompt if provided
	if system_prompt.strip():
	# Handle thinking mode flags in system prompt
	if enable_thinking and "/no_think" not in system_prompt:
	if "/think" not in system_prompt:
	system_prompt += "/think"
	elif not enable_thinking and "/think" not in system_prompt:
	if "/no_think" not in system_prompt:
	system_prompt += "/no_think"
	messages.append({"role": "system", "content": system_prompt})
	else:
	# Use enable_thinking parameter if no system prompt
	if not enable_thinking:
	messages.append({"role": "system", "content": "/no_think"})

	# Add conversation history
	for human_msg, assistant_msg in history:
	messages.append({"role": "user", "content": human_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	# Add current message
	messages.append({"role": "user", "content": message})

	# Apply chat template
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	enable_thinking=enable_thinking if not system_prompt.strip() else None
	)

	# Tokenize input
	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

	# Generate response
	with torch.no_grad():
	generated_ids = model.generate(
	**model_inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	# Decode response
	output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
	response = tokenizer.decode(output_ids, skip_special_tokens=True)

	return response

	@spaces.GPU
	def chat_with_tools(message, history, tools_json="", system_prompt="", enable_thinking=False, temperature=0.6, top_p=0.95, max_tokens=32768):
	"""
	Chat with SmolLM3-3B using tool calling capabilities
	"""
	# Parse tools if provided
	tools = []
	if tools_json.strip():
	try:
	import json
	tools = json.loads(tools_json)
	except:
	return "Error: Invalid JSON format for tools"

	# Prepare messages
	messages = []

	# Add system prompt if provided
	if system_prompt.strip():
	messages.append({"role": "system", "content": system_prompt})

	# Add conversation history
	for human_msg, assistant_msg in history:
	messages.append({"role": "user", "content": human_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	# Add current message
	messages.append({"role": "user", "content": message})

	# Apply chat template with tools
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	enable_thinking=enable_thinking,
	xml_tools=tools if tools else None
	)

	# Tokenize input
	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

	# Generate response
	with torch.no_grad():
	generated_ids = model.generate(
	**model_inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	# Decode response
	output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
	response = tokenizer.decode(output_ids, skip_special_tokens=True)

	return response

	# Example tools for demonstration
	example_tools = """[
	{
	"name": "get_weather",
	"description": "Get the weather in a city",
	"parameters": {
	"type": "object",
	"properties": {
	"city": {
	"type": "string",
	"description": "The city to get the weather for"
	}
	}
	}
	},
	{
	"name": "calculate",
	"description": "Perform basic mathematical calculations",
	"parameters": {
	"type": "object",
	"properties": {
	"expression": {
	"type": "string",
	"description": "Mathematical expression to evaluate"
	}
	}
	}
	}
	]"""

	# Create Gradio interface with dark theme and mobile support
	with gr.Blocks(
	title="SmolLM3-3B Chat",
	theme=gr.themes.Base().set(
	background_fill_primary="#1a1a1a",
	background_fill_secondary="#2d2d2d",
	border_color_primary="#404040",
	button_primary_background_fill="#4a9eff",
	button_primary_background_fill_hover="#5aa3ff",
	button_primary_text_color="#ffffff",
	block_background_fill="#2d2d2d",
	block_border_color="#404040",
	input_background_fill="#3a3a3a",
	input_border_color="#404040",
	slider_color="#4a9eff",
	),
	css="""
	/* Mobile-first responsive design */
	@media (max-width: 768px) {
	.gradio-container {
	padding: 8px !important;
	}
	.gr-row {
	flex-direction: column !important;
	}
	.gr-column {
	width: 100% !important;
	min-width: 0 !important;
	}
	.gr-tabs {
	font-size: 14px !important;
	}
	.gr-button {
	width: 100% !important;
	margin: 2px 0 !important;
	}
	.gr-textbox {
	font-size: 16px !important;
	}
	.gr-chatbot {
	height: 400px !important;
	}
	.gr-markdown {
	font-size: 14px !important;
	}
	.gr-slider {
	width: 100% !important;
	}
	.settings-panel {
	margin-top: 20px !important;
	}
	}

	/* Settings panel styling */
	.settings-panel {
	background-color: #2d2d2d !important;
	border: 1px solid #404040 !important;
	border-radius: 8px !important;
	padding: 16px !important;
	margin-top: 12px !important;
	}

	.settings-button {
	background-color: #3a3a3a !important;
	border: 1px solid #404040 !important;
	color: #ffffff !important;
	padding: 8px 16px !important;
	border-radius: 6px !important;
	cursor: pointer !important;
	font-size: 14px !important;
	margin-bottom: 8px !important;
	}

	.settings-button:hover {
	background-color: #4a4a4a !important;
	}

	/* Dark mode improvements */
	.gr-chatbot {
	background-color: #2d2d2d !important;
	}

	.gr-chatbot .message {
	background-color: #3a3a3a !important;
	border: 1px solid #404040 !important;
	border-radius: 8px !important;
	margin: 4px 0 !important;
	padding: 8px !important;
	}

	.gr-chatbot .message.user {
	background-color: #4a9eff !important;
	color: white !important;
	}

	.gr-chatbot .message.bot {
	background-color: #3a3a3a !important;
	color: #ffffff !important;
	}

	/* Better mobile touch targets */
	@media (max-width: 768px) {
	.gr-button {
	min-height: 44px !important;
	padding: 12px !important;
	}
	.gr-slider input {
	min-height: 44px !important;
	}
	.gr-checkbox {
	min-height: 44px !important;
	}
	}

	/* Improve readability */
	.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
	color: #ffffff !important;
	}

	.gr-markdown p, .gr-markdown li {
	color: #e0e0e0 !important;
	}

	/* Tab styling */
	.gr-tabs .gr-tab {
	background-color: #3a3a3a !important;
	color: #ffffff !important;
	border-color: #404040 !important;
	}

	.gr-tabs .gr-tab.selected {
	background-color: #4a9eff !important;
	color: #ffffff !important;
	}
	"""
	) as demo:
	gr.Markdown("# 🤖 SmolLM3-3B Chat Interface")
	gr.Markdown("Chat with SmolLM3-3B, a 3B parameter model with advanced reasoning, long context support, and tool calling capabilities.")

	with gr.Tabs():
	with gr.TabItem("💬 Standard Chat"):
	chatbot = gr.Chatbot(height=500, label="Chat with SmolLM3-3B")
	msg = gr.Textbox(label="Your message", placeholder="Type your message here...")

	with gr.Row():
	submit = gr.Button("Send", variant="primary")
	clear = gr.Button("Clear")
	settings_btn = gr.Button("⚙️ Settings", size="sm")

	with gr.Column(visible=False, elem_classes="settings-panel") as settings_panel:
	gr.Markdown("### ⚙️ Advanced Settings")
	system_prompt = gr.Textbox(
	label="System Prompt",
	placeholder="Enter system instructions (optional)",
	lines=3,
	value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest."
	)
	enable_thinking = gr.Checkbox(
	label="Enable Extended Thinking",
	value=True,
	info="Enable reasoning traces for better responses"
	)
	temperature = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=0.6,
	step=0.1,
	label="Temperature"
	)
	top_p = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p"
	)
	max_tokens = gr.Slider(
	minimum=1,
	maximum=32768,
	value=32768,
	step=1,
	label="Max Tokens"
	)

	def respond(message, history, sys_prompt, thinking, temp, top_p_val, max_tok):
	response = chat_with_smollm3(message, history, sys_prompt, thinking, temp, top_p_val, max_tok)
	history.append((message, response))
	return "", history

	def toggle_settings():
	return gr.update(visible=not settings_panel.visible)

	submit.click(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot])
	msg.submit(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot])
	clear.click(lambda: ([], ""), outputs=[chatbot, msg])
	settings_btn.click(toggle_settings, outputs=[settings_panel])

	with gr.TabItem("🛠️ Tool Calling"):
	tool_chatbot = gr.Chatbot(height=500, label="Chat with Tools")
	tool_msg = gr.Textbox(label="Your message", placeholder="Ask me to use tools...")

	with gr.Row():
	tool_submit = gr.Button("Send", variant="primary")
	tool_clear = gr.Button("Clear")
	tool_settings_btn = gr.Button("⚙️ Settings", size="sm")

	with gr.Column(visible=False, elem_classes="settings-panel") as tool_settings_panel:
	gr.Markdown("### 🛠️ Tool Settings")
	tools_json = gr.Textbox(
	label="Tools JSON",
	placeholder="Enter tools as JSON array",
	lines=10,
	value=example_tools
	)
	tool_system_prompt = gr.Textbox(
	label="System Prompt",
	placeholder="Enter system instructions (optional)",
	lines=2,
	value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest."
	)
	tool_thinking = gr.Checkbox(
	label="Enable Extended Thinking",
	value=False,
	info="Enable reasoning traces for tool usage"
	)
	tool_temperature = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=0.6,
	step=0.1,
	label="Temperature"
	)
	tool_top_p = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p"
	)
	tool_max_tokens = gr.Slider(
	minimum=1,
	maximum=32768,
	value=32768,
	step=1,
	label="Max Tokens"
	)

	def tool_respond(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok):
	response = chat_with_tools(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok)
	history.append((message, response))
	return "", history

	def toggle_tool_settings():
	return gr.update(visible=not tool_settings_panel.visible)

	tool_submit.click(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot])
	tool_msg.submit(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot])
	tool_clear.click(lambda: ([], ""), outputs=[tool_chatbot, tool_msg])
	tool_settings_btn.click(toggle_tool_settings, outputs=[tool_settings_panel])

	gr.Markdown("""
	### 📚 Model Information
	- Model: HuggingFaceTB/SmolLM3-3B
	- Features: Advanced reasoning, long context (up to 128k tokens), multilingual support
	- Languages: English, French, Spanish, German, Italian, Portuguese (+ Arabic, Chinese, Russian)
	- Extended Thinking: Provides reasoning traces for better responses
	- Tool Calling: Supports XML-based tool calling for agentic workflows

	### 💡 Usage Tips
	- Use Extended Thinking for complex reasoning tasks
	- Adjust temperature (0.6 recommended) for response creativity
	- Try different system prompts for specialized behaviors
	- Use tool calling for function-based interactions
	""")

	if __name__ == "__main__":
	demo.launch()