File size: 2,614 Bytes
46fc666
 
ed09431
46fc666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed09431
46fc666
ed09431
 
 
 
 
 
 
 
46fc666
 
 
ed09431
 
46fc666
 
ed09431
46fc666
 
ed09431
 
 
46fc666
 
 
 
 
ed09431
46fc666
 
ed09431
 
 
46fc666
b1f68b3
 
 
 
 
 
 
 
 
 
 
ed09431
 
 
46fc666
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio # For the GUI
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # For LLMs!

# Instantiate the model that we'll be calling. This is a tiny one!
MODEL_ID = "HuggingFaceTB/SmolLM2-135M-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
pipe = pipeline(
    task="text-generation",
    model=AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
    ),
    tokenizer=tokenizer
)

# Create a helper to format the chat message appropriately
def _format_chat(system_prompt: str, user_prompt: str, history: list[dict]) -> str:
    """Format messages using chat template if available."""
    messages = [{"role": "system", "content": system_prompt}]
    messages.extend(history)
    messages.append({"role": "user", "content": user_prompt})

    template = getattr(tokenizer, "chat_template", None)
    return tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

# Create an LLM helper function
def _llm_generate(prompt: str, max_tokens: int, temperature: float) -> str:
    out = pipe(
        prompt,
        max_new_tokens=max_tokens,
        do_sample=True,
        temperature=temperature,
        return_full_text=False
    )
    return out[0]["generated_text"].strip()

# Wrap the above in a response function that will be the primary entry point for the GUI
def respond(
    message,
    history: list[dict[str, str]],
    system_message,
    max_tokens,
    temperature,
):

    formatted = _format_chat(system_message, message, history)
    response = _llm_generate(
        formatted,
        max_tokens=max_tokens,
        temperature=temperature,
    )
    return response

# Last but not least, here's the UI! This one is pretty simple.
chatbot = gradio.ChatInterface(
    respond,
    type="messages",
    additional_inputs=[
        gradio.Textbox(
            value=(
                "You are a chatbot that helps come up with fancy, exciting names for research papers."
            ),
            label="System message"
        ),
        gradio.Slider(minimum=0, maximum=256, value=128, step=16, label="Max new tokens"),
        gradio.Slider(minimum=0.1, maximum=2.0, value=0.9, step=0.1, label="Temperature"),
    ],
)

with gradio.Blocks() as demo:


    # Let's start by adding a title and introduction
    gradio.Markdown(
        "# A Simple Chatbot"
    )
    gradio.Markdown(
        "This app runs a simple chatbot on CPU and exposes a few backen parameters too."
    )
    
    # Now insert the chat window
    chatbot.render()

if __name__ == "__main__":
    demo.launch(debug=True)