Spaces:
Running
Running
DeFactOfficial
commited on
Commit
•
996c544
1
Parent(s):
477624d
Update api.js
Browse files
api.js
CHANGED
@@ -202,97 +202,11 @@ app.post('api/generate/speech', async (req, res) =>{
|
|
202 |
})
|
203 |
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
const {voice, text, model} = req.query
|
209 |
-
console.log("Utterance Params", {voice, text, model})
|
210 |
-
//const outputFilename= await generateAudio(voice, text, model || "tts-1")
|
211 |
-
|
212 |
-
// We want the browser to cache this response, because there's no reason to TTS the same text-voice-model combination more than once
|
213 |
-
//await res.sendFile(path.resolve(outputFilename), { headers: { 'Content-Type': 'audio/mpeg', 'Cache-Control':'Max-Age=8640000' } });
|
214 |
})
|
215 |
-
|
216 |
-
app.post('api/generate/utterance', async (req, res) =>{
|
217 |
-
const {voice, text, model} = req.body
|
218 |
-
const outputFilename= await generateAudio(voice, text, model || "tts-1")
|
219 |
-
|
220 |
-
// We want the browser to cache this response, because there's no reason to TTS the same text-voice-model combination more than once
|
221 |
-
res.sendFile(path.resolve(outputFilename), { headers: { 'Content-Type': 'audio/mpeg', 'Cache-Control':'Max-Age=8640000' } });
|
222 |
-
})
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
// This returns a stream of SSE (application/event-stream) similar to a streaming response from an LLM
|
227 |
-
// See example in public/client for how to consume the stream
|
228 |
-
app.post('/api/generate/speech/stream', async (req, res) => {
|
229 |
-
try {
|
230 |
-
const apiKey = req.query.api_key || 'their_api_key';
|
231 |
-
if (apiKey !== 'their_api_key') {
|
232 |
-
// Replace "their_api_key" with your actual method of managing API keys
|
233 |
-
res.status(401).send('Unauthorized');
|
234 |
-
return;
|
235 |
-
}
|
236 |
-
|
237 |
-
const script = req.body.payload;
|
238 |
-
if (!script) {
|
239 |
-
res.status(400).send('Bad Request: Missing payload');
|
240 |
-
return;
|
241 |
-
}
|
242 |
-
|
243 |
-
// Set headers for SSE
|
244 |
-
res.setHeader('Content-Type', 'text/event-stream');
|
245 |
-
res.setHeader('Cache-Control', 'no-cache');
|
246 |
-
res.setHeader('Connection', 'keep-alive');
|
247 |
-
|
248 |
-
const hash = crypto.createHash('sha1');
|
249 |
-
hash.update(script);
|
250 |
-
const scriptHash = hash.digest('hex');
|
251 |
-
|
252 |
-
if (audioCache[scriptHash]) {
|
253 |
-
// If audio is cached, send the final SSE with the combined audio URL
|
254 |
-
const filePath = audioCache[scriptHash];
|
255 |
-
console.log(filePath)
|
256 |
-
|
257 |
-
|
258 |
-
res.write(`event: audio_complete\ndata: ${req.protocol}://${req.get('host')}/${filePath}\n\n`);
|
259 |
-
res.end();
|
260 |
-
return;
|
261 |
-
}
|
262 |
-
|
263 |
-
const parsedSegments = parseScript(script);
|
264 |
-
const audioSegments = [];
|
265 |
-
|
266 |
-
for (const segment of parsedSegments) {
|
267 |
-
const audioPath = await generateAudio(segment.speaker_name, segment.content);
|
268 |
-
audioSegments.push(audioPath);
|
269 |
-
|
270 |
-
// Send SSE with the URL of the generated audio segment
|
271 |
-
res.write(`event: audio_segment\ndata: ${req.protocol}://${req.get('host')}/${audioPath}\n\n`);
|
272 |
-
}
|
273 |
-
|
274 |
-
if (audioSegments.length === 0) {
|
275 |
-
res.write(`event: error\ndata: No audio generated\n\n`);
|
276 |
-
res.end();
|
277 |
-
return;
|
278 |
-
}
|
279 |
-
|
280 |
-
// Concatenate audio files into one using FFmpeg
|
281 |
-
const combinedAudioPath = path.join(MEDIA_FOLDER, `combined_${uuidv4()}.mp3`);
|
282 |
-
await concatenateAudioFiles(audioSegments, combinedAudioPath);
|
283 |
-
|
284 |
-
audioCache[scriptHash] = combinedAudioPath;
|
285 |
-
console.log(combinedAudioPath)
|
286 |
-
// Send SSE with the URL of the combined audio
|
287 |
-
res.write(`event: audio_complete\ndata: ${req.protocol}://${req.get('host')}/${combinedAudioPath}\n\n`);
|
288 |
-
res.end();
|
289 |
-
} catch (error) {
|
290 |
-
console.error('Error generating speech:', error);
|
291 |
-
res.write(`event: error\ndata: Internal Server Error\n\n`);
|
292 |
-
res.end();
|
293 |
-
}
|
294 |
-
});
|
295 |
-
|
296 |
|
297 |
//Image generation parameters
|
298 |
//response_format: image | url
|
|
|
202 |
})
|
203 |
|
204 |
|
205 |
+
app.get('/api/hello', async(req, res) => {
|
206 |
+
await res.status(200).send({"hello": "world"}, {headers: {"Content-Type":"application/json"}})
|
207 |
+
res.end()
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
})
|
209 |
+
// This is normal TTS: specify voice, text, model. Voices are from openai, use those names or the aliases in lookup table
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
//Image generation parameters
|
212 |
//response_format: image | url
|