Spaces:
Runtime error
Runtime error
# Use a pipeline as a high-level helper | |
import torch | |
from transformers import pipeline | |
from scipy.io import wavfile | |
from PIL import Image | |
import gradio as gr | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
image_pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large",device=device) | |
narator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs",device=device) | |
def generate_audio(text): | |
# generate the audio from the text | |
audio_text = narator(text) | |
# save the audio to a WAV file | |
wavfile.write(filename="audio.wav", | |
rate=audio_text['sampling_rate'], | |
data=audio_text['audio'][0]) | |
return "audio.wav" | |
def caption_my_image(image_path): | |
image = image_pipe(image_path) | |
caption_text = image[0]['generated_text'] | |
return generate_audio(caption_text) | |
demo = gr.Interface(fn=caption_my_image, | |
inputs=[gr.Image(label="Image",type="pil")], | |
outputs=[gr.Audio(label="Image Caption")], | |
title="@SmartChoiceLearningHub HF Project 1 :Image to Text to Speech", | |
description="This app generates a caption for an image and converts the caption to speech.") | |
demo.launch() |