awacke1 commited on
Commit
5fe20b3
·
verified ·
1 Parent(s): e315c05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -29
app.py CHANGED
@@ -11,55 +11,37 @@ from whisperspeech.pipeline import Pipeline
11
  DEVEL=os.environ.get('DEVEL', False)
12
 
13
  title = """
14
- <picture>
15
- <source srcset="https://huggingface.co/spaces/collabora/whisperspeech/resolve/main/dark-banner.png" media="(prefers-color-scheme: dark)" />
16
- <img alt="WhisperSpeech banner with Collabora and LAION logos" src="https://huggingface.co/spaces/collabora/whisperspeech/resolve/main/light-banner.png" style="width: 60%; margin: 0 auto;" />
17
- </picture>
18
-
19
- # Welcome to Collabora's WhisperSpeech
20
-
21
  WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
22
- The model is fully open and you can run it on your local hardware. It's like **Stable Diffusion but for speech**
23
- – both powerful and easily customizable.
24
-
25
- [You can contribute to WhisperSpeech on Github.](https://github.com/collabora/WhisperSpeech)
26
- You can also join the discussion on Discord [![](https://dcbadge.vercel.app/api/server/FANw4rHD5E)](https://discord.gg/FANw4rHD5E)
27
-
28
- Huge thanks to [Tonic](https://huggingface.co/Tonic) who helped build this Space for WhisperSpeech.
29
 
30
  ### How to Use It
31
-
32
  Write you text in the box, you can use language tags (`<en>` or `<pl>`) to create multilingual speech.
33
  Optionally you can upload a speech sample or give it a file URL to clone an existing voice. Check out the
34
  examples at the bottom of the page for inspiration.
35
  """
36
 
37
  footer = """
38
-
39
  ### How to use it locally
40
-
41
  ```
42
  pip install -U WhisperSpeech
43
  ```
44
-
45
  Afterwards:
46
-
47
  ```
48
  from whisperspeech.pipeline import Pipeline
49
-
50
  pipe = Pipeline(torch_compile=True)
51
  pipe.generate_to_file("output.wav", "Hello from WhisperSpeech.")
52
  ```
53
  """
54
 
55
-
56
  text_examples = [
57
  ["This is the first demo of Whisper Speech, a fully open source text-to-speech model trained by Collabora and Lion on the Juwels supercomputer.", None],
58
  ["World War II or the Second World War was a global conflict that lasted from 1939 to 1945. The vast majority of the world's countries, including all the great powers, fought as part of two opposing military alliances: the Allies and the Axis.", "https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg"],
59
- ["<pl>To jest pierwszy test wielojęzycznego <en>Whisper Speech <pl>, modelu zamieniającego tekst na mowę, który Collabora i Laion nauczyli na superkomputerze <en>Jewels.", None],
60
- ["<en> WhisperSpeech is an Open Source library that helps you convert text to speech. <pl>Teraz także po Polsku! <en>I think I just tried saying \"now also in Polish\", don't judge me...", None],
61
- # ["<de> WhisperSpeech is multi-lingual <es> y puede cambiar de idioma <hi> मध्य वाक्य में"],
62
- ["<pl>To jest pierwszy test naszego modelu. Pozdrawiamy serdecznie.", None],
63
  # ["<en> The big difference between Europe <fr> et les Etats Unis <pl> jest to, że mamy tak wiele języków <uk> тут, в Європі"]
64
  ]
65
 
@@ -88,13 +70,9 @@ def generate_audio(pipe, segments, speaker, speaker_url, cps=14):
88
  def whisper_speech_demo(multilingual_text, speaker_audio=None, speaker_url="", cps=14):
89
  if len(multilingual_text) == 0:
90
  raise gr.Error("Please enter some text for me to speak!")
91
-
92
  segments = parse_multilingual_text(multilingual_text)
93
-
94
  audio = generate_audio(pipe, segments, speaker_audio, speaker_url, cps)
95
-
96
  return (24000, audio.T.numpy())
97
-
98
  # Did not work for me in Safari:
99
  # mp3 = io.BytesIO()
100
  # torchaudio.save(mp3, audio, 24000, format='mp3')
 
11
  DEVEL=os.environ.get('DEVEL', False)
12
 
13
  title = """
 
 
 
 
 
 
 
14
  WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
15
+ The model is fully open and you can run it on your local hardware.
16
+ https://github.com/collabora/WhisperSpeech
17
+ https://discord.gg/FANw4rHD5E
 
 
 
 
18
 
19
  ### How to Use It
 
20
  Write you text in the box, you can use language tags (`<en>` or `<pl>`) to create multilingual speech.
21
  Optionally you can upload a speech sample or give it a file URL to clone an existing voice. Check out the
22
  examples at the bottom of the page for inspiration.
23
  """
24
 
25
  footer = """
 
26
  ### How to use it locally
 
27
  ```
28
  pip install -U WhisperSpeech
29
  ```
 
30
  Afterwards:
 
31
  ```
32
  from whisperspeech.pipeline import Pipeline
 
33
  pipe = Pipeline(torch_compile=True)
34
  pipe.generate_to_file("output.wav", "Hello from WhisperSpeech.")
35
  ```
36
  """
37
 
 
38
  text_examples = [
39
  ["This is the first demo of Whisper Speech, a fully open source text-to-speech model trained by Collabora and Lion on the Juwels supercomputer.", None],
40
  ["World War II or the Second World War was a global conflict that lasted from 1939 to 1945. The vast majority of the world's countries, including all the great powers, fought as part of two opposing military alliances: the Allies and the Axis.", "https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg"],
41
+ #["<pl>To jest pierwszy test wielojęzycznego <en>Whisper Speech <pl>, modelu zamieniającego tekst na mowę, który Collabora i Laion nauczyli na superkomputerze <en>Jewels.", None],
42
+ #["<en> WhisperSpeech is an Open Source library that helps you convert text to speech. <pl>Teraz także po Polsku! <en>I think I just tried saying \"now also in Polish\", don't judge me...", None],
43
+ ["<de> WhisperSpeech is multi-lingual <es> hay una vez un bar un bargochicitito <hi> मध्य वाक्य में", None],
44
+ #["<pl>To jest pierwszy test naszego modelu. Pozdrawiamy serdecznie.", None],
45
  # ["<en> The big difference between Europe <fr> et les Etats Unis <pl> jest to, że mamy tak wiele języków <uk> тут, в Європі"]
46
  ]
47
 
 
70
  def whisper_speech_demo(multilingual_text, speaker_audio=None, speaker_url="", cps=14):
71
  if len(multilingual_text) == 0:
72
  raise gr.Error("Please enter some text for me to speak!")
 
73
  segments = parse_multilingual_text(multilingual_text)
 
74
  audio = generate_audio(pipe, segments, speaker_audio, speaker_url, cps)
 
75
  return (24000, audio.T.numpy())
 
76
  # Did not work for me in Safari:
77
  # mp3 = io.BytesIO()
78
  # torchaudio.save(mp3, audio, 24000, format='mp3')