Lluiss commited on
Commit
a1711e2
β€’
1 Parent(s): 8964d2d

Add app and requirements files

Browse files
Files changed (2) hide show
  1. app.py +39 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
2
+ import torch
3
+
4
+ model_id = "stabilityai/stable-diffusion-2"
5
+
6
+ # Use the Euler scheduler here instead
7
+ scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
8
+ pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
9
+ pipe = pipe.to("cuda")
10
+
11
+ def text_to_image(prompt):
12
+ image = pipe(prompt).images[0]
13
+ return image
14
+
15
+ from transformers import pipeline
16
+ import gradio as gr
17
+
18
+ # Indicamos el tipo de tarea para la que se estΓ‘ creando el pipeline (ASR)
19
+ model = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-spanish")
20
+
21
+ def transcribe_audio(mic=None, file=None):
22
+ if mic is not None:
23
+ audio = mic
24
+ elif file is not None:
25
+ audio = file
26
+ else:
27
+ return "You must either provide a mic recording or a file"
28
+ transcription = model(audio)["text"]
29
+ image = text_to_image(transcription)
30
+ return [transcription, image]
31
+
32
+ gr.Interface(
33
+ fn=transcribe_audio,
34
+ inputs=[
35
+ gr.Audio(sources=["microphone"], type="filepath", label="Speak here..."),
36
+ gr.Audio(sources=["upload"], type="filepath", label="Upload file here..."),
37
+ ],
38
+ outputs=[gr.Textbox(label="Transcription"), gr.Image(label="Generated Image")],
39
+ ).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ torch
3
+ diffusers