Spaces:

rahul7star
/

Qwen4-Think

Runtime error

App Files Files Community

Qwen4-Think / app.py

rahul7star

Create app.py

2c806d6 verified 2 months ago

raw

history blame contribute delete

2.41 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	MODEL_NAME = "ValiantLabs/Qwen3-4B-Thinking-2507-Esper3.1"

	# Load model & tokenizer once at startup
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype="auto",
	device_map="auto"
	)

	def ask_question(prompt):
	"""Generate response (thinking + final content) from Qwen3 model."""
	try:
	messages = [{"role": "user", "content": prompt}]
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	enable_thinking=True # thinking mode
	)
	inputs = tokenizer([text], return_tensors="pt").to(model.device)

	generated_ids = model.generate(
	**inputs,
	max_new_tokens=4096,
	temperature=0.7,
	do_sample=True
	)
	output_ids = generated_ids[0][len(inputs.input_ids[0]):].tolist()

	# Find the thinking section (token 151668 == </think>)
	try:
	index = len(output_ids) - output_ids[::-1].index(151668)
	except ValueError:
	index = 0

	thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
	content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

	return thinking_content, content
	except Exception as e:
	return f"⚠️ Error: {e}", ""

	# --- Gradio UI ---
	with gr.Blocks(title="Qwen3 Thinking Chat") as demo:
	gr.Markdown("## 🧠 Qwen3-4B-Thinking — Ask Anything")
	gr.Markdown(
	"This demo uses ValiantLabs/Qwen3-4B-Thinking-2507-Esper3.1, "
	"a reasoning model that shows its internal 'thinking' trace before giving the final answer."
	)

	with gr.Row():
	prompt_box = gr.Textbox(
	label="Ask your question",
	placeholder="e.g. Explain how quantum entanglement works.",
	lines=3
	)

	with gr.Row():
	think_output = gr.Textbox(label="🧩 Thinking process", lines=10)
	final_output = gr.Textbox(label="💬 Final answer", lines=10)

	ask_btn = gr.Button("🚀 Generate Answer")

	ask_btn.click(
	fn=ask_question,
	inputs=prompt_box,
	outputs=[think_output, final_output]
	)

	demo.launch()