|
import base64 |
|
import os |
|
from io import BytesIO |
|
|
|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
|
|
PROMPT = os.environ.get("PROMPT", "Describe this image.") |
|
|
|
client = InferenceClient(model="https://text.pollinations.ai/openai") |
|
|
|
def image_to_base64(image): |
|
buf = BytesIO() |
|
image.save(buf, "JPEG") |
|
buf.seek(0) |
|
return base64.b64encode(buf.getvalue()).decode("utf-8") |
|
|
|
def caption(image, prompt): |
|
image = image_to_base64(image) |
|
return client.chat.completions.create( |
|
model="openai-large", |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "image_url", |
|
"image_url": {"url": f"data:image/jpeg;base64,{image}"} |
|
}, |
|
{"type": "text", "text": prompt} |
|
] |
|
} |
|
], |
|
max_tokens=1024 |
|
).choices[0].message.content |
|
|
|
gr.Interface( |
|
caption, |
|
inputs=[ |
|
gr.Image(type="pil", label="Image"), |
|
gr.Textbox(label="Prompt", value=PROMPT) |
|
], |
|
outputs=gr.Textbox(label="Caption"), |
|
title="Image Captioning" |
|
).launch(debug=True) |