prog-tasca-7 / app.py
Lluiss's picture
Add app and requirements files
a1711e2
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
import torch
model_id = "stabilityai/stable-diffusion-2"
# Use the Euler scheduler here instead
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
def text_to_image(prompt):
image = pipe(prompt).images[0]
return image
from transformers import pipeline
import gradio as gr
# Indicamos el tipo de tarea para la que se estΓ‘ creando el pipeline (ASR)
model = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-spanish")
def transcribe_audio(mic=None, file=None):
if mic is not None:
audio = mic
elif file is not None:
audio = file
else:
return "You must either provide a mic recording or a file"
transcription = model(audio)["text"]
image = text_to_image(transcription)
return [transcription, image]
gr.Interface(
fn=transcribe_audio,
inputs=[
gr.Audio(sources=["microphone"], type="filepath", label="Speak here..."),
gr.Audio(sources=["upload"], type="filepath", label="Upload file here..."),
],
outputs=[gr.Textbox(label="Transcription"), gr.Image(label="Generated Image")],
).launch(debug=True)