Spaces:
Runtime error
Runtime error
File size: 1,261 Bytes
b56dea8 19d9d4a b56dea8 19d9d4a b56dea8 39f177f b56dea8 235005a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
from transformers import BlipForConditionalGeneration, BlipProcessor
import torch
import tempfile
from gtts import gTTS
# Load models
device = "cpu"
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model_image_captioning = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
def generate_caption_tts(image):
inputs = processor(images=image, return_tensors="pt")
inputs["max_length"] = 20
inputs["num_beams"] = 5
outputs = model_image_captioning.generate(**inputs)
caption = processor.batch_decode(outputs, skip_special_tokens=True)[0]
speech = gTTS(caption, lang="en")
tmp_file = tempfile.mkstemp()[1]
speech.save(tmp_file)
return (caption, tmp_file)
title ="<span style='font-style: italic; font-weight: bold; color: darkred;'>李飒博士作品</span> - AI图像理解交互机器人"
description = "BLPM模型:引导性语言图像预训练以实现统一视觉语言理解和生成。 请上传您的图像"
iface = gr.Interface(
fn=generate_caption_tts,
title=title,
description=description,
inputs=gr.inputs.Image(shape=(224,224)),
outputs=["text", "audio"]
)
#iface.launch(share=True, debug=True)
iface.launch() |