medelharchaoui's picture
adjust output audio and narrator output
9c6e277 verified
import gradio as gr
from transformers import pipeline
img_text_pipe = pipeline("image-to-text",
model="Salesforce/blip-image-captioning-base")
narrator = pipeline("text-to-speech",
model="kakao-enterprise/vits-ljs")
def describe_image(file_path):
img_text_pip_output = img_text_pipe(file_path)
description_text = img_text_pip_output[0]['generated_text']
print(description_text)
narrated_text = narrator(description_text)
(narrated_text["sampling_rate"], narrated_text["audio"][0] )
return (narrated_text["sampling_rate"], narrated_text["audio"][0])
iface = gr.Interface(fn=describe_image,
inputs=gr.Image(label="Input image",
type="pil"),
outputs=gr.Audio(label="Narration", type="numpy", autoplay=True)
)
iface.launch()