thealphamerc commited on
Commit
7d0bff2
·
1 Parent(s): ea9036c

Add voice assets

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  tts_output.wav filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  tts_output.wav filter=lfs diff=lfs merge=lfs -text
36
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
37
+ *.wav filter=lfs diff=lfs merge=lfs -text
__pycache__/app.cpython-311.pyc ADDED
Binary file (1.83 kB). View file
 
app.py CHANGED
@@ -1,6 +1,21 @@
1
  from subprocess import call
2
  import gradio as gr
3
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  def run_cmd(command):
@@ -12,19 +27,43 @@ def run_cmd(command):
12
  sys.exit(1)
13
 
14
 
15
- def inference(text):
16
- cmd = ['tts', '--text', text]
17
- run_cmd(cmd)
18
- return 'tts_output.wav'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
21
- inputs = gr.inputs.Textbox(lines=5, label="Input Text")
 
 
 
22
  outputs = gr.outputs.Audio(type="filepath", label="Output Audio")
23
  title = "Text To Speech"
24
  description = "An example of using TTS to generate speech from text."
25
  article = ""
26
  examples = [
27
- ["This is an open-source library that generates synthetic speech!=1"]
28
  ]
29
  gr.Interface(
30
  inference,
 
1
  from subprocess import call
2
  import gradio as gr
3
  import os
4
+ from TTS.api import TTS
5
+
6
+ # List available 🐸TTS models and choose the first one
7
+ all_models = TTS.list_models()
8
+ # for model in all_models:
9
+ # print(model)
10
+
11
+ # print("Using model: ", all_models[0])
12
+ model_name = all_models[0]
13
+ # Init TTS
14
+
15
+
16
+ print("Downloading model...", '')
17
+
18
+ voiceCloneModel = TTS('tts_models/multilingual/multi-dataset/your_tts')
19
 
20
 
21
  def run_cmd(command):
 
27
  sys.exit(1)
28
 
29
 
30
+ def inference(text, speaker):
31
+ if (speaker == 'Speaker-1'):
32
+ speaker = 'input/amitabh.mp3'
33
+ elif (speaker == 'Speaker-2'):
34
+ speaker = 'input/amrish.mp3'
35
+ elif (speaker == 'Speaker-3'):
36
+ speaker = 'input/obama.mp3'
37
+ elif (speaker == 'Speaker-4'):
38
+ speaker = 'input/trump.wav'
39
+ else:
40
+ speaker = 'input/z-default.wav'
41
+ # print("speaker: ", speaker)
42
+ # cmd = ['tts', '--text', text, '--out_path', 'output/tts_output.wav']
43
+ # run_cmd(cmd)
44
+ # Text to speech to a file
45
+ # tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts",
46
+ # progress_bar=False, gpu=True)
47
+ voiceCloneModel.tts_to_file(text, speaker_wav=speaker,
48
+ language="en", file_path="output/output.wav")
49
+
50
+ # for i in range(len(tts.languages)):
51
+ # tts.tts_to_file(text=text,
52
+ # speaker=tts.speakers[i], language=tts.languages[0], file_path='output/output-'+str(i)+'.wav')
53
+
54
+ return 'output/output.wav'
55
 
56
 
57
+ inputs = [gr.inputs.Textbox(lines=5, label="Input Text"),
58
+ gr.inputs.Dropdown(['Speaker-1', 'Speaker-2', 'Speaker-3',
59
+ 'Speaker-4'], label="Model")
60
+ ]
61
  outputs = gr.outputs.Audio(type="filepath", label="Output Audio")
62
  title = "Text To Speech"
63
  description = "An example of using TTS to generate speech from text."
64
  article = ""
65
  examples = [
66
+ ["This is an open-source library that generates synthetic speech"]
67
  ]
68
  gr.Interface(
69
  inference,
gradio_queue.db ADDED
Binary file (16.4 kB). View file
 
input/amitabh.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a0c5aa3e3d7e9a26187bca7f1c7cec1be1df918e697ae921b9348adb69d15a0
3
+ size 12735771
input/amrish.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:915e1b84a482dac3d497cb3d44e9db1669e8c580499400a771b9b80ad363113a
3
+ size 1321518
input/obama.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a371205b2067c1b6e05cb5befec4fbeaedb97a3367065f74ff655c6a4d170e47
3
+ size 28035585
input/trump.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f245a5ffc7adb79ef3a43c64a713472955c681a3f5c8c34f73f994c0fdf29d8
3
+ size 30961742
input/z-default.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2b5a06bca26a00b069a455cff44a977cca57fbfc5078e64edbdfb764ccb5c07
3
+ size 1504332
output/output.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd247a0b637133326079fa149c85c8544671c92c0c7a78ec93952db3cc0dfb2
3
+ size 132172
tts_output.wav → output/tts_output.wav RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b5ca414fb01823c7e62c09eb4d0dccfe9775023a658239e698d368881a25f7a
3
- size 5128268
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93d9cb433afdef64e85d65c1594202dc7fd784c14db651c8374f96f5ffaf2f63
3
+ size 204364