CodeBook / app.py
PatoFlamejanteTV's picture
Added smaller models, surely it can run a lot faster now with smaller models
051cc9c verified
import gradio as gr
from transformers import pipeline
# ---------- Default Models ----------
DEFAULT_MODELS = {
"prompt_refiner": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"code_model": "codellama/CodeLlama-7b-Instruct-hf",
"book_model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"critic_1": "google/gemma-2-9b-it",
"critic_2": "meta-llama/Meta-Llama-3-8B-Instruct"
}
# ---------- Model Descriptions ----------
MODEL_INFO = {
"mistralai/Mixtral-8x7B-Instruct-v0.1": "Balanced generalist; strong in structured reasoning and storytelling.",
"codellama/CodeLlama-7b-Instruct-hf": "Excellent code generator; best for pseudocode expansion and logic clarity.",
"tiiuae/falcon-7b-instruct": "Fast and lightweight; good for simple creative text, but less technical precision.",
"google/gemma-2-9b-it": "Analytical critic that provides detailed, structured feedback.",
"meta-llama/Meta-Llama-3-8B-Instruct": "Balanced critic; creative and nuanced, slightly more lenient.",
"phind/Phind-CodeLlama-34B-v2": "Expert coder model; verbose but deeply logical and precise.",
"stabilityai/stablelm-2-12b": "Fluent natural-language generator; great for fiction and tone consistency.",
"gpt2-small": "117M params: Very light text generation for prototyping.",
"tinybert-4": "14M params: Extremely compact, best for classification or short text outputs.",
"smollm2-135m": "135M params: Small generative tasks with lightweight footprint.",
"qwen2.5-0.5b-instruct": "Approx 500M params: Instruction-following moderate model.",
"tinyllama-1.1b": "Approx 1.1B params: General purpose small LLM, story & code generation.",
"llama3.2-1b": "Approx 1B params: Balanced small LLM for chat and generation."
}
# ---------- Helpers ----------
def load_pipeline(model_name):
return pipeline("text-generation", model=model_name)
# ---------- Core Logic ----------
def refine_prompt(idea, model_name):
model = load_pipeline(model_name)
refined = model(
f"Refine this creative idea into a concise, high-quality prompt: {idea}",
max_new_tokens=200
)[0]["generated_text"]
return refined.strip()
def generate_code(prompt, model_name):
model = load_pipeline(model_name)
pseudo = model(f"Create simple pseudocode for: {prompt}", max_new_tokens=200)[0]["generated_text"]
simple = model(f"Expand this pseudocode into a simple code snippet:\n{pseudo}", max_new_tokens=300)[0]["generated_text"]
full = model(f"Turn this snippet into a complete, functional program:\n{simple}", max_new_tokens=700)[0]["generated_text"]
return pseudo.strip(), simple.strip(), full.strip()
def generate_book(prompt, model_name):
model = load_pipeline(model_name)
structure = ["Start", "Development", "Climax", "Conclusion", "End"]
parts = []
for section in structure:
part = model(f"Write the {section} section of a short book based on this idea: {prompt}", max_new_tokens=400)[0]["generated_text"]
parts.append(f"### {section}\n{part.strip()}\n")
return "\n".join(parts)
def refine_output(output_text, model_name):
model = load_pipeline(model_name)
refined = model(f"Improve this text/code while preserving meaning and coherence:\n{output_text}", max_new_tokens=700)[0]["generated_text"]
return refined.strip()
def get_critic_feedback(output_text, model1_name, model2_name):
critic1 = load_pipeline(model1_name)
critic2 = load_pipeline(model2_name)
critique_1 = critic1(f"Rate this text from 0 to 100 and justify the score briefly:\n{output_text}", max_new_tokens=200)[0]["generated_text"]
critique_2 = critic2(f"Rate this text from 0 to 100 and justify the score briefly:\n{output_text}", max_new_tokens=200)[0]["generated_text"]
return critique_1.strip(), critique_2.strip()
# ---------- Workflow ----------
def workflow(idea, mode, prompt_model, code_model, book_model, critic1_model, critic2_model):
refined_prompt = refine_prompt(idea, prompt_model)
if mode == "Code mode":
pseudo, simple, full = generate_code(refined_prompt, code_model)
generated_output = f"## Refined Prompt\n{refined_prompt}\n\n### Pseudocode\n{pseudo}\n\n### Simple Code\n{simple}\n\n### Final Code\n{full}"
refined_final = refine_output(full, prompt_model)
else:
book_text = generate_book(refined_prompt, book_model)
generated_output = f"## Refined Prompt\n{refined_prompt}\n\n{book_text}"
refined_final = refine_output(book_text, prompt_model)
# Critics now evaluate the REFINED version
feedback1, feedback2 = get_critic_feedback(refined_final, critic1_model, critic2_model)
return refined_prompt, generated_output, refined_final, feedback1, feedback2
import gradio as gr
from transformers import pipeline
# -------------------------------
# Model configuration dictionary
# -------------------------------
MODEL_INFO = {
"gpt2": "117M params: Classic small model for text generation, coherent short outputs.",
"tiiuae/falcon-rw-1b": "1B params: Lightweight general model, good for creative text or simple logic.",
"microsoft/phi-2": "2.7B params: Compact and strong for reasoning or code, moderate GPU load.",
"Qwen/Qwen2.5-0.5B-Instruct": "0.5B params: Efficient instruction model, performs well for structured prompts.",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0": "1.1B params: Balanced, fast, and decent for storytelling and small code snippets.",
"SmolLM2-135M": "135M params: Extremely light, suitable for quick text generation with limited coherence.",
}
def get_model_description(model_name):
"""Return description of selected model."""
return MODEL_INFO.get(model_name, "Select a model to view details.")
# -------------------------------
# Pipelines Setup
# -------------------------------
def load_pipeline(model_name):
"""Load a text generation pipeline for a given model."""
return pipeline("text-generation", model=model_name, device_map="auto")
# Default base models for specific roles
REFINER_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
CRITIC_MODEL_1 = "Qwen/Qwen2.5-0.5B-Instruct"
CRITIC_MODEL_2 = "tiiuae/falcon-rw-1b"
# Preload pipelines for speed
refiner_pipe = load_pipeline(REFINER_MODEL)
critic_pipe_1 = load_pipeline(CRITIC_MODEL_1)
critic_pipe_2 = load_pipeline(CRITIC_MODEL_2)
# -------------------------------
# Core Logic
# -------------------------------
def workflow(idea, mode, model_name):
# Step 1: Refine the idea
ref_prompt = f"Refine this idea into a clear, specific prompt for {mode}:\n\n{idea}"
refined = refiner_pipe(ref_prompt, max_new_tokens=120, temperature=0.7)[0]["generated_text"]
# Step 2: Generate output
gen_pipe = load_pipeline(model_name)
if mode == "Code mode":
code_prompt = f"Create complete working code for this idea:\n\n{refined}\nInclude comments and clear structure."
else:
code_prompt = f"Write a short book with sections: Start, Development, Climax, Conclusion, and End. The theme:\n\n{refined}"
output = gen_pipe(code_prompt, max_new_tokens=500, temperature=0.8, do_sample=True)[0]["generated_text"]
# Step 3: Critics
critique_prompt = (
f"Rate the following {mode} output from 0 to 100, and explain weaknesses and improvements:\n\n{output}"
)
feedback_1 = critic_pipe_1(critique_prompt, max_new_tokens=200)[0]["generated_text"]
feedback_2 = critic_pipe_2(critique_prompt, max_new_tokens=200)[0]["generated_text"]
# Try to extract a numeric rating
def extract_score(text):
import re
match = re.search(r"(\d{1,3})", text)
if match:
score = int(match.group(1))
return min(100, max(0, score))
return 50
score1 = extract_score(feedback_1)
score2 = extract_score(feedback_2)
avg_score = (score1 + score2) / 2
# Step 4: Refine based on critics
refine_final_prompt = f"Refine this output based on these two critics' feedbacks:\n\nCritic 1: {feedback_1}\n\nCritic 2: {feedback_2}\n\nOriginal Output:\n{output}"
final_output = refiner_pipe(refine_final_prompt, max_new_tokens=400)[0]["generated_text"]
combined_feedback = f"Critic 1 ({score1}/100): {feedback_1}\n\nCritic 2 ({score2}/100): {feedback_2}"
return refined, output, avg_score, combined_feedback, final_output
# -------------------------------
# Gradio Interface
# -------------------------------
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## ๐Ÿงฉ AI Idea Refinement & Creation Workflow")
idea_input = gr.Textbox(
label="๐Ÿ’ก Your Idea",
placeholder="Type your idea (e.g. 'A tool that teaches programming using natural language puzzles')"
)
mode = gr.Radio(["Code mode", "Book mode"], label="Select Mode", value="Code mode")
with gr.Accordion("โš™๏ธ Advanced Options", open=False):
model_dropdown = gr.Dropdown(
choices=list(MODEL_INFO.keys()),
value="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
label="Model for Main Generation"
)
model_desc = gr.Markdown(get_model_description("TinyLlama/TinyLlama-1.1B-Chat-v1.0"))
model_dropdown.change(fn=get_model_description, inputs=model_dropdown, outputs=model_desc)
refined_prompt = gr.Textbox(label="๐Ÿง  Refined Prompt", interactive=False)
initial_output = gr.Textbox(label="๐Ÿงพ Generated Output (Pre-Critics)", lines=10)
critic_score = gr.Number(label="๐Ÿ“Š Average Score (0โ€“100)", interactive=False)
critic_feedback = gr.Textbox(label="๐Ÿงฉ Criticsโ€™ Combined Feedback", lines=10)
refined_output = gr.Textbox(label="๐Ÿ’Ž Final Refined Output (Post-Critics)", lines=10)
run_button = gr.Button("๐Ÿš€ Run Full Workflow")
run_button.click(
fn=workflow,
inputs=[idea_input, mode, model_dropdown],
outputs=[refined_prompt, initial_output, critic_score, critic_feedback, refined_output]
)
demo.launch()