Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| MODEL_NAME = "ValiantLabs/Qwen3-4B-Thinking-2507-Esper3.1" | |
| # Load model & tokenizer once at startup | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype="auto", | |
| device_map="auto" | |
| ) | |
| def ask_question(prompt): | |
| """Generate response (thinking + final content) from Qwen3 model.""" | |
| try: | |
| messages = [{"role": "user", "content": prompt}] | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True, | |
| enable_thinking=True # thinking mode | |
| ) | |
| inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
| generated_ids = model.generate( | |
| **inputs, | |
| max_new_tokens=4096, | |
| temperature=0.7, | |
| do_sample=True | |
| ) | |
| output_ids = generated_ids[0][len(inputs.input_ids[0]):].tolist() | |
| # Find the thinking section (token 151668 == </think>) | |
| try: | |
| index = len(output_ids) - output_ids[::-1].index(151668) | |
| except ValueError: | |
| index = 0 | |
| thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n") | |
| content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n") | |
| return thinking_content, content | |
| except Exception as e: | |
| return f"β οΈ Error: {e}", "" | |
| # --- Gradio UI --- | |
| with gr.Blocks(title="Qwen3 Thinking Chat") as demo: | |
| gr.Markdown("## π§ Qwen3-4B-Thinking β Ask Anything") | |
| gr.Markdown( | |
| "This demo uses **ValiantLabs/Qwen3-4B-Thinking-2507-Esper3.1**, " | |
| "a reasoning model that shows its internal 'thinking' trace before giving the final answer." | |
| ) | |
| with gr.Row(): | |
| prompt_box = gr.Textbox( | |
| label="Ask your question", | |
| placeholder="e.g. Explain how quantum entanglement works.", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| think_output = gr.Textbox(label="π§© Thinking process", lines=10) | |
| final_output = gr.Textbox(label="π¬ Final answer", lines=10) | |
| ask_btn = gr.Button("π Generate Answer") | |
| ask_btn.click( | |
| fn=ask_question, | |
| inputs=prompt_box, | |
| outputs=[think_output, final_output] | |
| ) | |
| demo.launch() | |