JuanjoRosquete commited on
Commit
d1e87b2
1 Parent(s): a8f66b6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!pip install tf-keras
2
+ #!pip install --upgrade diffusers transformers accelerate scipy safetensors
3
+ #!pip install --upgrade gradio
4
+ #!pip install numpy
5
+ pip install diffusers
6
+ #from transformers import pipeline
7
+ import gradio as gr
8
+ #import torch
9
+ from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
10
+
11
+ model_id = "stabilityai/stable-diffusion-2"
12
+
13
+ scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
14
+ image_model = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
15
+ image_model = image_model.to("cuda")
16
+
17
+ model = pipeline("automatic-speech-recognition","facebook/wav2vec2-large-xlsr-53-spanish")
18
+
19
+ def transcribe_text_audio(mic=None, file=None):
20
+ if mic is not None:
21
+ audio = mic
22
+ elif file is not None:
23
+ audio = file
24
+ else:
25
+ return "No se ha detectado ninguna entrada de audio"
26
+ transcription = model(audio)["text"]
27
+
28
+ image = image_model(transcription).images[0]
29
+
30
+ image = image.convert("RGB")
31
+ return transcription, image
32
+
33
+
34
+ gr.Interface(
35
+ fn=transcribe_text_audio,
36
+ inputs=[
37
+ gr.Audio(sources=["microphone"], type="filepath"),
38
+ gr.Audio(sources=["upload"], type="filepath"),
39
+ ],
40
+ outputs=["text", "image"],
41
+ ).launch()