AudioToImage / app.py
Bartusito's picture
Update app.py
0d72b3d
raw
history blame contribute delete
No virus
1.17 kB
import gradio as gr
import numpy as np
import torch
from huggingsound import SpeechRecognitionModel
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from transformers import pipeline
# Función para convertir la tasa de muestreo del audio de entrada
def modelo1(audio):
print(audio)
whisper = pipeline('automatic-speech-recognition', model='openai/whisper-medium', device=-1) # Cambia 'device' a -1 para usar la CPU
print(np.array(audio[1]))
text = whisper(np.array(audio[1]))
print(text["text"])
return text["text"]
def modelo2(text):
model_id = "stabilityai/stable-diffusion-2-1"
# Use the DPMSolverMultistepScheduler (DPM-Solver++) scheduler here instead
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float32)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
image = pipe(text).images[0]
return image
def execution(audio):
modelo1res = modelo1(audio)
modelo2res = modelo2(modelo1res)
return modelo2res
if __name__ == "__main__":
demo = gr.Interface(fn=execution, inputs="audio", outputs="image")
demo.launch()