rushic24 commited on
Commit
c852ee2
1 Parent(s): 4204f70

some api changes for tts

Browse files
Files changed (6) hide show
  1. .gitignore +1 -1
  2. api_app.py +38 -12
  3. app.py +1 -2
  4. requirements.txt +2 -2
  5. templates/details.html +131 -0
  6. templates/index.html +143 -0
.gitignore CHANGED
@@ -1,2 +1,2 @@
1
- audio.wav
2
  *.png
 
1
+ *.wav
2
  *.png
api_app.py CHANGED
@@ -1,4 +1,5 @@
1
- from flask import Flask, redirect, url_for, request
 
2
  import gradio as gr
3
  from synthesize import synthesize, load_model
4
  from synthesis.vocoders import Hifigan
@@ -13,22 +14,47 @@ def inference(text: str):
13
  model=model,
14
  text=text,
15
  graph_path="graph.png",
16
- audio_path="audio.wav",
17
  vocoder=vocoder,
18
  )
19
- return "audio.wav"
20
 
21
  app = Flask(__name__)
22
 
23
- @app.route('/process',methods = ['POST'])
24
- def login():
25
- if request.method == 'POST':
26
- text = request.json['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  inference(text)
28
- data, fs = sf.read("audio.wav", dtype='float32')
29
- sd.play(data, fs)
30
- status = sd.wait() # Wait until file is done playing
31
- return {'success': True}
32
 
33
  if __name__ == '__main__':
34
- app.run(debug = True)
 
1
+ from flask import Flask, redirect, url_for, request, send_from_directory
2
+ from flask import Flask, render_template, request, send_file
3
  import gradio as gr
4
  from synthesize import synthesize, load_model
5
  from synthesis.vocoders import Hifigan
 
14
  model=model,
15
  text=text,
16
  graph_path="graph.png",
17
+ audio_path="tts.wav",
18
  vocoder=vocoder,
19
  )
20
+ return True
21
 
22
  app = Flask(__name__)
23
 
24
+
25
+ @app.route("/")
26
+ def index():
27
+ return render_template(
28
+ "index.html",
29
+ show_details=False,
30
+ use_multi_speaker=False,
31
+ speaker_ids=None,
32
+ use_gst=False)
33
+
34
+ @app.route("/details")
35
+ def details():
36
+ vocoder_config = None
37
+
38
+ return render_template(
39
+ "details.html",
40
+ show_details=False,
41
+ model_config=None,
42
+ vocoder_config=None,
43
+ args=None,
44
+ )
45
+
46
+
47
+ # This format is similar to coqui-ai TTS api
48
+ @app.route('/api/tts', methods = ['GET']) #?text=this%20is%20a%20tses&speaker_id=&style_wav=
49
+ def get_file():
50
+ text = request.args.get('text')
51
+ print(request.args)
52
+ if not text:
53
+ print('empty text')
54
+ return {'message':'textempty'}
55
+ else:
56
  inference(text)
57
+ return send_from_directory('./', 'tts.wav', as_attachment=True)
 
 
 
58
 
59
  if __name__ == '__main__':
60
+ app.run(debug = True, host='0.0.0.0', port=5002)
app.py CHANGED
@@ -15,8 +15,7 @@ article = """<p style='text-align: center'>
15
  class='footer'>Blog</a> |
16
  <a href='https://github.com/eugenesiow/practical-ml' target='_blank'
17
  class='footer'>Github Repo</a></p>"""
18
- examples = ["Generate english speech from text using a Tacotron2 model.",
19
- ""]
20
 
21
  def inference(text: str):
22
  synthesize(
 
15
  class='footer'>Blog</a> |
16
  <a href='https://github.com/eugenesiow/practical-ml' target='_blank'
17
  class='footer'>Github Repo</a></p>"""
18
+ examples = ["Generate english speech from text using a Tacotron2 model."]
 
19
 
20
  def inference(text: str):
21
  synthesize(
requirements.txt CHANGED
@@ -9,8 +9,8 @@ omegaconf==2.0.6
9
  numba==0.47
10
  webrtcvad==2.0.10
11
  requests==2.25.1
12
- torch==1.9.0+cu111
13
- torchvision==0.10.0+cu111
14
  torchaudio===0.9.0
15
  deepspeech==0.9.3
16
  llvmlite==0.32.1
 
9
  numba==0.47
10
  webrtcvad==2.0.10
11
  requests==2.25.1
12
+ torch==1.9.0
13
+ torchvision==0.10.0
14
  torchaudio===0.9.0
15
  deepspeech==0.9.3
16
  llvmlite==0.32.1
templates/details.html ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+
6
+ <meta charset="utf-8">
7
+ <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
8
+ <meta name="description" content="">
9
+ <meta name="author" content="">
10
+
11
+ <title>TTS engine</title>
12
+
13
+ <!-- Bootstrap core CSS -->
14
+ <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css"
15
+ integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous"
16
+ rel="stylesheet">
17
+
18
+ <!-- Custom styles for this template -->
19
+ <style>
20
+ body {
21
+ padding-top: 54px;
22
+ }
23
+
24
+ @media (min-width: 992px) {
25
+ body {
26
+ padding-top: 56px;
27
+ }
28
+ }
29
+ </style>
30
+ </head>
31
+
32
+ <body>
33
+ <a href="https://github.com/mozilla/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;"
34
+ src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
35
+
36
+ {% if show_details == true %}
37
+
38
+ <div class="container">
39
+ <b>Model details</b>
40
+ </div>
41
+
42
+ <div class="container">
43
+ <details>
44
+ <summary>CLI arguments:</summary>
45
+ <table border="1" align="center" width="75%">
46
+ <tr>
47
+ <td> CLI key </td>
48
+ <td> Value </td>
49
+ </tr>
50
+
51
+ {% for key, value in args.items() %}
52
+
53
+ <tr>
54
+ <td>{{ key }}</td>
55
+ <td>{{ value }}</td>
56
+ </tr>
57
+
58
+ {% endfor %}
59
+ </table>
60
+ </details>
61
+ </div></br>
62
+
63
+ <div class="container">
64
+
65
+ {% if model_config != None %}
66
+
67
+ <details>
68
+ <summary>Model config:</summary>
69
+
70
+ <table border="1" align="center" width="75%">
71
+ <tr>
72
+ <td> Key </td>
73
+ <td> Value </td>
74
+ </tr>
75
+
76
+
77
+ {% for key, value in model_config.items() %}
78
+
79
+ <tr>
80
+ <td>{{ key }}</td>
81
+ <td>{{ value }}</td>
82
+ </tr>
83
+
84
+ {% endfor %}
85
+
86
+ </table>
87
+ </details>
88
+
89
+ {% endif %}
90
+
91
+ </div></br>
92
+
93
+
94
+
95
+ <div class="container">
96
+ {% if vocoder_config != None %}
97
+ <details>
98
+ <summary>Vocoder model config:</summary>
99
+
100
+ <table border="1" align="center" width="75%">
101
+ <tr>
102
+ <td> Key </td>
103
+ <td> Value </td>
104
+ </tr>
105
+
106
+
107
+ {% for key, value in vocoder_config.items() %}
108
+
109
+ <tr>
110
+ <td>{{ key }}</td>
111
+ <td>{{ value }}</td>
112
+ </tr>
113
+
114
+ {% endfor %}
115
+
116
+
117
+ </table>
118
+ </details>
119
+ {% endif %}
120
+ </div></br>
121
+
122
+ {% else %}
123
+ <div class="container">
124
+ <b>Please start server with --show_details=true to see details.</b>
125
+ </div>
126
+
127
+ {% endif %}
128
+
129
+ </body>
130
+
131
+ </html>
templates/index.html ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+
6
+ <meta charset="utf-8">
7
+ <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
8
+ <meta name="description" content="🐸Coqui AI TTS demo server.">
9
+ <meta name="author" content="🐸Coqui AI TTS">
10
+
11
+ <title>TTS engine</title>
12
+
13
+ <!-- Bootstrap core CSS -->
14
+ <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css"
15
+ integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous"
16
+ rel="stylesheet">
17
+
18
+ <!-- Custom styles for this template -->
19
+ <style>
20
+ body {
21
+ padding-top: 54px;
22
+ }
23
+
24
+ @media (min-width: 992px) {
25
+ body {
26
+ padding-top: 56px;
27
+ }
28
+ }
29
+ </style>
30
+ </head>
31
+
32
+ <body>
33
+ <a href="https://github.com/coqui-ai/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;"
34
+ src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
35
+
36
+ <!-- Navigation -->
37
+ <!--
38
+ <nav class="navbar navbar-expand-lg navbar-dark bg-dark fixed-top">
39
+ <div class="container">
40
+ <a class="navbar-brand" href="#">Coqui TTS</a>
41
+ <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarResponsive" aria-controls="navbarResponsive" aria-expanded="false" aria-label="Toggle navigation">
42
+ <span class="navbar-toggler-icon"></span>
43
+ </button>
44
+ <div class="collapse navbar-collapse" id="navbarResponsive">
45
+ <ul class="navbar-nav ml-auto">
46
+ <li class="nav-item active">
47
+ <a class="nav-link" href="#">Home
48
+ <span class="sr-only">(current)</span>
49
+ </a>
50
+ </li>
51
+ </ul>
52
+ </div>
53
+ </div>
54
+ </nav>
55
+ -->
56
+
57
+ <!-- Page Content -->
58
+ <div class="container">
59
+ <div class="row">
60
+ <div class="col-lg-12 text-center">
61
+ <img class="mt-5" src="{{url_for('static', filename='coqui-log-green-TTS.png')}}" align="middle"
62
+ width="512" />
63
+
64
+ <ul class="list-unstyled">
65
+ </ul>
66
+
67
+ {%if use_gst%}
68
+ <input value='{"0": 0.1}' id="style_wav" placeholder="style wav (dict or path ot wav).." size=45
69
+ type="text" name="style_wav">
70
+ {%endif%}
71
+
72
+ <input id="text" placeholder="Type here..." size=45 type="text" name="text">
73
+ <button id="speak-button" name="speak">Speak</button><br /><br />
74
+
75
+ {%if use_multi_speaker%}
76
+ Choose a speaker:
77
+ <select id="speaker_id" name=speaker_id method="GET" action="/">
78
+ {% for speaker_id in speaker_ids %}
79
+ <option value="{{speaker_id}}" SELECTED>{{speaker_id}}</option>"
80
+ {% endfor %}
81
+ </select><br /><br />
82
+ {%endif%}
83
+
84
+ {%if show_details%}
85
+ <button id="details-button" onclick="location.href = 'details'" name="model-details">Model
86
+ Details</button><br /><br />
87
+ {%endif%}
88
+ <audio id="audio" controls autoplay hidden></audio>
89
+ <p id="message"></p>
90
+ </div>
91
+ </div>
92
+ </div>
93
+
94
+ <!-- Bootstrap core JavaScript -->
95
+ <script>
96
+ function getTextValue(textId) {
97
+ const container = q(textId)
98
+ if (container) {
99
+ return container.value
100
+ }
101
+ return ""
102
+ }
103
+ function q(selector) { return document.querySelector(selector) }
104
+ q('#text').focus()
105
+ function do_tts(e) {
106
+ const text = q('#text').value
107
+ const speaker_id = getTextValue('#speaker_id')
108
+ const style_wav = getTextValue('#style_wav')
109
+ if (text) {
110
+ q('#message').textContent = 'Synthesizing...'
111
+ q('#speak-button').disabled = true
112
+ q('#audio').hidden = true
113
+ synthesize(text, speaker_id, style_wav)
114
+ }
115
+ e.preventDefault()
116
+ return false
117
+ }
118
+ q('#speak-button').addEventListener('click', do_tts)
119
+ q('#text').addEventListener('keyup', function (e) {
120
+ if (e.keyCode == 13) { // enter
121
+ do_tts(e)
122
+ }
123
+ })
124
+ function synthesize(text, speaker_id = "", style_wav = "") {
125
+ fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}`, { cache: 'no-cache' })
126
+ .then(function (res) {
127
+ if (!res.ok) throw Error(res.statusText)
128
+ return res.blob()
129
+ }).then(function (blob) {
130
+ q('#message').textContent = ''
131
+ q('#speak-button').disabled = false
132
+ q('#audio').src = URL.createObjectURL(blob)
133
+ q('#audio').hidden = false
134
+ }).catch(function (err) {
135
+ q('#message').textContent = 'Error: ' + err.message
136
+ q('#speak-button').disabled = false
137
+ })
138
+ }
139
+ </script>
140
+
141
+ </body>
142
+
143
+ </html>