Spaces:
Sleeping
Sleeping
File size: 2,236 Bytes
4ac4222 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
from transformers import pipeline, AutoTokenizer
import io
import base64
from PIL import Image
import gradio as gr
model = "Salesforce/blip-image-captioning-large"
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True)
pipe = pipeline(task="image-to-text",
model=model,
tokenizer=tokenizer)
def image_to_base64(image: Image) -> str:
"""
Convert an image to a base64 string.
"""
bytearray= io.BytesIO()
image.save(bytearray, format="PNG")
return str(base64.b64encode(bytearray.getvalue()).decode('utf-8'))
def caption_image(image):
result = pipe(
image_to_base64(image),
#Temperature=0.7,
# max_length=130,
# min_length=30,
#do_sample=True
)
return result[0]['generated_text'].upper()
if __name__ == "__main__":
gr.close_all()
with gr.Blocks() as interface:
gr.Markdown("### Image Captioning using BLIP Large")
with gr.Row():
image_input = gr.Image(type="pil", label="Image")
with gr.Row():
caption_output = gr.Textbox(lines=2, label="Caption")
with gr.Row():
clear_button = gr.ClearButton()
caption_button = gr.Button("Caption", variant="primary")
with gr.Row():
example_images = gr.Examples(
examples=[
"data/image1.jpg",
"data/image2.png",
"data/image3.jpg",
"data/image4.jpg",
"data/image5.jpg",
"data/image6.png",
"data/image7.png",
"data/image8.jpeg",
"data/image9.jpeg",
"data/image10.jpg",
],
inputs=[image_input],
label="Example Images"
)
caption_button.click(fn=caption_image,
inputs=[image_input],
outputs=[caption_output]
)
clear_button.click(fn=lambda: [None,""],
inputs=[],
outputs=[image_input, caption_output])
interface.launch(share=True, server_port=7860)
|