rodrigomasini commited on
Commit
5e4694a
1 Parent(s): ae73f04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -26
app.py CHANGED
@@ -11,27 +11,15 @@ from whisperspeech.pipeline import Pipeline
11
  DEVEL=os.environ.get('DEVEL', False)
12
 
13
  title = """
14
- <picture>
15
- <source srcset="https://huggingface.co/spaces/collabora/whisperspeech/resolve/main/dark-banner.png" media="(prefers-color-scheme: dark)" />
16
- <img alt="WhisperSpeech banner with Collabora and LAION logos" src="https://huggingface.co/spaces/collabora/whisperspeech/resolve/main/light-banner.png" style="width: 60%; margin: 0 auto;" />
17
- </picture>
18
 
19
- # Welcome to Collabora's WhisperSpeech
20
-
21
- WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
22
- The model is fully open and you can run it on your local hardware. It's like **Stable Diffusion but for speech**
23
- – both powerful and easily customizable.
24
-
25
- [You can contribute to WhisperSpeech on Github.](https://github.com/collabora/WhisperSpeech)
26
- You can also join the discussion on Discord [![](https://dcbadge.vercel.app/api/server/FANw4rHD5E)](https://discord.gg/FANw4rHD5E)
27
-
28
- Huge thanks to [Tonic](https://huggingface.co/Tonic) who helped build this Space for WhisperSpeech.
29
 
30
  ### How to Use It
31
 
32
  Write you text in the box, you can use language tags (`<en>` or `<pl>`) to create multilingual speech.
33
- Optionally you can upload a speech sample or give it a file URL to clone an existing voice. Check out the
34
- examples at the bottom of the page for inspiration.
35
  """
36
 
37
  footer = """
@@ -96,10 +84,6 @@ def whisper_speech_demo(multilingual_text, speaker_audio=None, speaker_url="", c
96
 
97
  return (24000, audio.T.numpy())
98
 
99
- # Did not work for me in Safari:
100
- # mp3 = io.BytesIO()
101
- # torchaudio.save(mp3, audio, 24000, format='mp3')
102
- # return mp3.getvalue()
103
 
104
  pipe = Pipeline(torch_compile=not DEVEL)
105
  # warmup will come from regenerating the examples
@@ -110,21 +94,21 @@ with gr.Blocks() as demo:
110
  with gr.Column(scale=2):
111
  text_input = gr.Textbox(label="Enter multilingual text💬📝",
112
  value=text_examples[0][0],
113
- info="You can use `<en>` for English and `<pl>` for Polish, see examples below.")
114
  cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
115
- label="Tempo (in characters per second)")
116
  with gr.Row(equal_height=True):
117
- speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬",
118
  sources=["upload", "microphone"],
119
  type='filepath')
120
  url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
121
  gr.Markdown(" \n ") # fixes the bottom overflow from Audio
122
- generate_button = gr.Button("Try Collabora's WhisperSpeech🌟")
123
  with gr.Column(scale=1):
124
- output_audio = gr.Audio(label="WhisperSpeech says…")
125
 
126
  with gr.Column():
127
- gr.Markdown("### Try these examples to get started !🌟🌬️")
128
  gr.Examples(
129
  examples=text_examples,
130
  inputs=[text_input, url_input],
 
11
  DEVEL=os.environ.get('DEVEL', False)
12
 
13
  title = """
14
+ # Whisper
 
 
 
15
 
16
+ Based on WhisperSpeech - Open Source text-to-speech system - built by Collabora and LAION by inverting Whisper.
17
+ It's like **Stable Diffusion but for speech**
 
 
 
 
 
 
 
 
18
 
19
  ### How to Use It
20
 
21
  Write you text in the box, you can use language tags (`<en>` or `<pl>`) to create multilingual speech.
22
+ Optionally you can upload a speech sample or give it a file URL to clone an existing voice.
 
23
  """
24
 
25
  footer = """
 
84
 
85
  return (24000, audio.T.numpy())
86
 
 
 
 
 
87
 
88
  pipe = Pipeline(torch_compile=not DEVEL)
89
  # warmup will come from regenerating the examples
 
94
  with gr.Column(scale=2):
95
  text_input = gr.Textbox(label="Enter multilingual text💬📝",
96
  value=text_examples[0][0],
97
+ info="You can use `<en>` for English.")
98
  cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
99
+ label="Time (in characters per second)")
100
  with gr.Row(equal_height=True):
101
+ speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)",
102
  sources=["upload", "microphone"],
103
  type='filepath')
104
  url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
105
  gr.Markdown(" \n ") # fixes the bottom overflow from Audio
106
+ generate_button = gr.Button("Run")
107
  with gr.Column(scale=1):
108
+ output_audio = gr.Audio(label="Result")
109
 
110
  with gr.Column():
111
+ gr.Markdown("### Examples:")
112
  gr.Examples(
113
  examples=text_examples,
114
  inputs=[text_input, url_input],