electro-sb's picture
The image captioning project has been committed.
4ac4222
from transformers import pipeline, AutoTokenizer
import io
import base64
from PIL import Image
import gradio as gr
model = "Salesforce/blip-image-captioning-large"
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True)
pipe = pipeline(task="image-to-text",
model=model,
tokenizer=tokenizer)
def image_to_base64(image: Image) -> str:
"""
Convert an image to a base64 string.
"""
bytearray= io.BytesIO()
image.save(bytearray, format="PNG")
return str(base64.b64encode(bytearray.getvalue()).decode('utf-8'))
def caption_image(image):
result = pipe(
image_to_base64(image),
#Temperature=0.7,
# max_length=130,
# min_length=30,
#do_sample=True
)
return result[0]['generated_text'].upper()
if __name__ == "__main__":
gr.close_all()
with gr.Blocks() as interface:
gr.Markdown("### Image Captioning using BLIP Large")
with gr.Row():
image_input = gr.Image(type="pil", label="Image")
with gr.Row():
caption_output = gr.Textbox(lines=2, label="Caption")
with gr.Row():
clear_button = gr.ClearButton()
caption_button = gr.Button("Caption", variant="primary")
with gr.Row():
example_images = gr.Examples(
examples=[
"data/image1.jpg",
"data/image2.png",
"data/image3.jpg",
"data/image4.jpg",
"data/image5.jpg",
"data/image6.png",
"data/image7.png",
"data/image8.jpeg",
"data/image9.jpeg",
"data/image10.jpg",
],
inputs=[image_input],
label="Example Images"
)
caption_button.click(fn=caption_image,
inputs=[image_input],
outputs=[caption_output]
)
clear_button.click(fn=lambda: [None,""],
inputs=[],
outputs=[image_input, caption_output])
interface.launch(share=True, server_port=7860)