Mbonea commited on
Commit
d8bf43a
1 Parent(s): 200d685

added transcription and music search

Browse files
App/TTS/Schemas.py CHANGED
@@ -19,19 +19,25 @@ class Speak(BaseModel):
19
  )
20
  super().__init__(**data)
21
 
 
22
  class DescriptSfxRequest(BaseModel):
23
- query:str
24
 
25
- class DescriptRequest(BaseModel):
 
 
26
  text: str
27
- speaker: Optional[str]=Field(default="Lawrance")
28
- _voice_id: Optional[str]
29
 
30
- class DescriptStatusRequest(BaseModel):
31
- id:str
32
 
 
 
 
 
33
 
34
 
 
 
 
35
 
36
  class HeyGenTTSRequest(BaseModel):
37
  voice_id: str = Field(default="d7bbcdd6964c47bdaae26decade4a933")
 
19
  )
20
  super().__init__(**data)
21
 
22
+
23
  class DescriptSfxRequest(BaseModel):
24
+ query: str
25
 
26
+
27
+ class DescriptTranscript(BaseModel):
28
+ audio_url: List[str]
29
  text: str
 
 
30
 
 
 
31
 
32
+ class DescriptRequest(BaseModel):
33
+ text: str
34
+ speaker: Optional[str] = Field(default="Lawrance")
35
+ _voice_id: Optional[str]
36
 
37
 
38
+ class DescriptStatusRequest(BaseModel):
39
+ id: str
40
+
41
 
42
  class HeyGenTTSRequest(BaseModel):
43
  voice_id: str = Field(default="d7bbcdd6964c47bdaae26decade4a933")
App/TTS/TTSRoutes.py CHANGED
@@ -8,6 +8,7 @@ from .Schemas import (
8
  DescriptRequest,
9
  DescriptStatusRequest,
10
  DescriptSfxRequest,
 
11
  )
12
  from .utils.Podcastle import PodcastleAPI
13
  from .utils.HeyGen import HeygenAPI
@@ -50,11 +51,22 @@ async def status_descript(req: DescriptStatusRequest):
50
  return await descript_tts.request_status(req.id)
51
 
52
 
 
 
 
 
 
53
  @tts_router.post("/descript_sfx")
54
  async def descript_sfx(req: DescriptSfxRequest):
55
  return await descript_tts.search_sound_effects(req.query)
56
 
57
 
 
 
 
 
 
 
58
  @tts_router.post("/descript_unsplash")
59
  async def descript_unsplash(req: DescriptSfxRequest):
60
  return await descript_tts.search_unsplash_images(req.query)
 
8
  DescriptRequest,
9
  DescriptStatusRequest,
10
  DescriptSfxRequest,
11
+ DescriptTranscript,
12
  )
13
  from .utils.Podcastle import PodcastleAPI
14
  from .utils.HeyGen import HeygenAPI
 
51
  return await descript_tts.request_status(req.id)
52
 
53
 
54
+ @tts_router.post("/descript_music")
55
+ async def descript_music(req: DescriptSfxRequest):
56
+ return await descript_tts.search_music(req.query)
57
+
58
+
59
  @tts_router.post("/descript_sfx")
60
  async def descript_sfx(req: DescriptSfxRequest):
61
  return await descript_tts.search_sound_effects(req.query)
62
 
63
 
64
+ @tts_router.post("/descript_transcript")
65
+ async def descript_transcript(req: DescriptTranscript):
66
+ return await descript_tts.get_transcription(req)
67
+ # return await descript_tts.search_sound_effects(req.query)
68
+
69
+
70
  @tts_router.post("/descript_unsplash")
71
  async def descript_unsplash(req: DescriptSfxRequest):
72
  return await descript_tts.search_unsplash_images(req.query)
App/TTS/utils/Descript.py CHANGED
@@ -1,10 +1,12 @@
1
  import aiohttp
2
- import asyncio
3
  import json, pprint, uuid, os, datetime
4
  import tempfile, shutil
5
  from typing import List, Optional
6
  from datetime import datetime, timedelta
7
  from pydantic import BaseModel, HttpUrl
 
 
8
 
9
 
10
  class Metadata(BaseModel):
@@ -52,6 +54,55 @@ class DescriptTTS:
52
  self.refresh_token = refresh_token
53
  self.tau_id = "90f9e0ad-594e-4203-9297-d4c7cc691e5x"
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  async def login_and_get_bearer_token(self):
56
  # Step 1: Use refresh token to get a new access token
57
  new_bearer_token, new_refresh_token = await self.refresh_access_token()
@@ -135,60 +186,80 @@ class DescriptTTS:
135
 
136
  return file_path
137
 
 
 
 
 
 
138
  async def search_unsplash_images(self, query_terms):
139
  url = "https://api.descript.com/v2/cloud_libraries/providers/unsplash/image/search"
140
  data = {
141
- 'tracking_info': {'project_id': self.project_id},
142
- 'pagination_info': {'page': 2, 'page_size': 25},
143
- 'query': {'terms': query_terms}
144
  }
145
 
146
  try:
147
- response = await self.make_authenticated_request(url, method="POST", data=data)
 
 
148
  return response
149
  except Exception as e:
150
  print(f"Failed to search Unsplash images: {e}")
151
  return None
152
 
 
 
 
 
 
 
 
153
 
154
-
 
 
 
 
 
 
 
155
 
156
  async def search_sound_effects(self, query_terms):
157
  url = "https://api.descript.com/v2/cloud_libraries/providers/stock-sfx/audio/search"
158
  headers = {
159
- 'accept': 'application/json, text/plain, */*',
160
- 'accept-language': 'en-US,en;q=0.9',
161
- 'content-type': 'application/json',
162
-
163
- 'authorization': f'Bearer {self.bearer_token}', # Use the valid bearer token
164
  }
165
  data = {
166
- 'tracking_info': {'project_id': self.project_id},
167
- 'pagination_info': {'page': 1, 'page_size': 25},
168
- 'query': {'terms': query_terms}
169
  }
170
 
171
  try:
172
- response = await self.make_authenticated_request(url, method="POST", data=data)
 
 
173
  return response
174
  except Exception as e:
175
  print(f"Failed to search sound effects: {e}")
176
- return {'status':str(e)}
177
-
178
 
179
  async def get_voices(self):
180
  url = "https://api.descript.com/v2/users/me/voices"
181
  try:
182
  response = await self.make_authenticated_request(url)
183
  voices = response
184
- self.voice_ids = {voice['name']: voice['id'] for voice in voices}
185
 
186
  return voices
187
  except Exception as e:
188
  print(f"Failed to fetch voices: {e}")
189
  return None
190
 
191
-
192
  async def start_token_refresh_schedule(self):
193
  while True:
194
  try:
@@ -206,7 +277,6 @@ class DescriptTTS:
206
  # Wait for 24 hours before the next refresh
207
  await asyncio.sleep(24 * 60 * 60)
208
 
209
-
210
  async def update_refresh_token(self, new_refresh_token):
211
  # Update the new refresh token to Firebase
212
  data = {"refresh_token": new_refresh_token}
@@ -220,7 +290,30 @@ class DescriptTTS:
220
  f"Failed to update refresh token. Status code: {response.status}, Error: {await response.text()}"
221
  )
222
 
223
- async def make_authenticated_request(self, url, method="GET", data=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  if not self.bearer_token:
225
  await self.login_and_get_bearer_token() # Make sure we have a valid bearer token
226
 
@@ -231,7 +324,6 @@ class DescriptTTS:
231
  "accept-version": "v1",
232
  "authorization": f"Bearer {self.bearer_token}",
233
  "cache-control": "no-cache",
234
- "content-type": "application/json",
235
  "origin": "https://web.descript.com",
236
  "pragma": "no-cache",
237
  "referer": "https://web.descript.com/",
@@ -251,29 +343,41 @@ class DescriptTTS:
251
  }
252
 
253
  async with aiohttp.ClientSession() as session:
254
- async with session.request(
255
- method, url, headers=headers, json=data
256
- ) as response:
257
- if response.status < 300:
258
- return await response.json()
259
- elif response.status == 401:
260
- self.refresh_token =None
261
- # Retry the request after refreshing the token
262
  await self.login_and_get_bearer_token()
263
  headers["authorization"] = f"Bearer {self.bearer_token}"
264
- async with session.request(
265
- method, url, headers=headers, json=data
266
- ) as retry_response:
267
- if retry_response.status == 200:
268
- return await retry_response.json()
269
- else:
270
- raise Exception(
271
- f"Request failed even after refreshing token. Status code: {retry_response.status}, Error: {await retry_response.text()}"
272
- )
273
- else:
274
- raise Exception(
275
- f"Request failed. Status code: {response.status}, Error: {await response.text()}"
276
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
  async def get_assets(self):
279
  url = "https://api.descript.com/v2/projects/f734c6d7-e39d-4c1d-8f41-417f94cd37ce/media_assets?include_artifacts=true&cursor=1702016922390&include_placeholder=true"
@@ -283,7 +387,7 @@ class DescriptTTS:
283
  except Exception as e:
284
  print(f"Failed to get assets: {str(e)}")
285
 
286
- async def overdub_text(self, text, speaker="Lawrance",_voice_id=None):
287
  url = "https://api.descript.com/v2/projects/f734c6d7-e39d-4c1d-8f41-417f94cd37ce/overdub"
288
  voice_id = _voice_id or self.voice_ids[speaker]
289
  data = {
@@ -334,20 +438,18 @@ class DescriptTTS:
334
  async def request_status(self, id):
335
  status = await self.overdub_staus(id)
336
  if status["state"] == "done":
337
- asset_id=status["result"]["imputation_audio_asset_id"]
338
  overdub = await self.get_assets()
339
  for asset in overdub["data"]:
340
  if asset["id"] == asset_id:
341
  data = TTSResponse(**asset)
342
  url = data.artifacts[0].read_url
343
- return {'url':url,'status':'done'}
344
  return status
345
 
346
-
347
-
348
  async def say(self, text, speaker="Henry"):
349
  overdub = await self.overdub_text(text, speaker=speaker)
350
-
351
  asset_id = None
352
  while True:
353
  status = await self.overdub_staus(overdub["id"])
@@ -366,5 +468,3 @@ class DescriptTTS:
366
  print(url)
367
  path = await self.download_and_store_file(str(url))
368
  return path, url
369
-
370
-
 
1
  import aiohttp
2
+ import asyncio, wave
3
  import json, pprint, uuid, os, datetime
4
  import tempfile, shutil
5
  from typing import List, Optional
6
  from datetime import datetime, timedelta
7
  from pydantic import BaseModel, HttpUrl
8
+ from App.TTS.Schemas import DescriptTranscript
9
+ from pydub import AudioSegment
10
 
11
 
12
  class Metadata(BaseModel):
 
54
  self.refresh_token = refresh_token
55
  self.tau_id = "90f9e0ad-594e-4203-9297-d4c7cc691e5x"
56
 
57
+ def concatenate_wave_files(self, input_file_paths):
58
+ """
59
+ Concatenates multiple wave files and saves the result to a new file.
60
+
61
+ :param input_file_paths: A list of paths to the input wave files.
62
+ """
63
+ temp_dir = tempfile.mkdtemp()
64
+ # Generate a unique random filename
65
+ random_filename = str(uuid.uuid4()) + ".wav"
66
+ output_file_path = os.path.join(temp_dir, random_filename)
67
+
68
+ # Check if input file paths are provided
69
+ if not input_file_paths:
70
+ raise ValueError("No input file paths provided.")
71
+
72
+ # Validate output file path
73
+ if not output_file_path:
74
+ raise ValueError("Output file path is empty.")
75
+
76
+ # Validate input file paths
77
+ for input_file_path in input_file_paths:
78
+ if not input_file_path:
79
+ raise ValueError("Empty input file path found.")
80
+
81
+ # Validate and get parameters from the first input file
82
+ with wave.open(input_file_paths[0], "rb") as input_file:
83
+ n_channels = input_file.getnchannels()
84
+ sampwidth = input_file.getsampwidth()
85
+ framerate = input_file.getframerate()
86
+ comptype = input_file.getcomptype()
87
+ compname = input_file.getcompname()
88
+
89
+ # Open the output file for writing
90
+ output_file = wave.open(output_file_path, "wb")
91
+ output_file.setnchannels(n_channels)
92
+ output_file.setsampwidth(sampwidth)
93
+ output_file.setframerate(framerate)
94
+ output_file.setcomptype(comptype, compname)
95
+
96
+ # Concatenate and write data from all input files to the output file
97
+ for input_file_path in input_file_paths:
98
+ with wave.open(input_file_path, "rb") as input_file:
99
+ output_file.writeframes(input_file.readframes(input_file.getnframes()))
100
+
101
+ # Close the output file
102
+ output_file.close()
103
+
104
+ return output_file_path
105
+
106
  async def login_and_get_bearer_token(self):
107
  # Step 1: Use refresh token to get a new access token
108
  new_bearer_token, new_refresh_token = await self.refresh_access_token()
 
186
 
187
  return file_path
188
 
189
+ def calculate_audio_duration(self, audio_file):
190
+ wav_file = AudioSegment.from_file(audio_file, format="wav")
191
+ duration_in_seconds = str(float(len(wav_file) / 1000))
192
+ return duration_in_seconds
193
+
194
  async def search_unsplash_images(self, query_terms):
195
  url = "https://api.descript.com/v2/cloud_libraries/providers/unsplash/image/search"
196
  data = {
197
+ "tracking_info": {"project_id": self.project_id},
198
+ "pagination_info": {"page": 2, "page_size": 25},
199
+ "query": {"terms": query_terms},
200
  }
201
 
202
  try:
203
+ response = await self.make_authenticated_request(
204
+ url, method="POST", data=data
205
+ )
206
  return response
207
  except Exception as e:
208
  print(f"Failed to search Unsplash images: {e}")
209
  return None
210
 
211
+ async def search_music(self, query_terms):
212
+ url = "https://web.descript.com/v2/cloud_libraries/providers/stock-music/audio/search"
213
+ data = {
214
+ "tracking_info": {"project_id": self.project_id},
215
+ "pagination_info": {"page": 2, "page_size": 25},
216
+ "query": {"terms": query_terms},
217
+ }
218
 
219
+ try:
220
+ response = await self.make_authenticated_request(
221
+ url, method="POST", data=data
222
+ )
223
+ return response
224
+ except Exception as e:
225
+ print(f"Failed to search music: {e}")
226
+ return None
227
 
228
  async def search_sound_effects(self, query_terms):
229
  url = "https://api.descript.com/v2/cloud_libraries/providers/stock-sfx/audio/search"
230
  headers = {
231
+ "accept": "application/json, text/plain, */*",
232
+ "accept-language": "en-US,en;q=0.9",
233
+ "content-type": "application/json",
234
+ "authorization": f"Bearer {self.bearer_token}", # Use the valid bearer token
 
235
  }
236
  data = {
237
+ "tracking_info": {"project_id": self.project_id},
238
+ "pagination_info": {"page": 1, "page_size": 25},
239
+ "query": {"terms": query_terms},
240
  }
241
 
242
  try:
243
+ response = await self.make_authenticated_request(
244
+ url, method="POST", data=data
245
+ )
246
  return response
247
  except Exception as e:
248
  print(f"Failed to search sound effects: {e}")
249
+ return {"status": str(e)}
 
250
 
251
  async def get_voices(self):
252
  url = "https://api.descript.com/v2/users/me/voices"
253
  try:
254
  response = await self.make_authenticated_request(url)
255
  voices = response
256
+ self.voice_ids = {voice["name"]: voice["id"] for voice in voices}
257
 
258
  return voices
259
  except Exception as e:
260
  print(f"Failed to fetch voices: {e}")
261
  return None
262
 
 
263
  async def start_token_refresh_schedule(self):
264
  while True:
265
  try:
 
277
  # Wait for 24 hours before the next refresh
278
  await asyncio.sleep(24 * 60 * 60)
279
 
 
280
  async def update_refresh_token(self, new_refresh_token):
281
  # Update the new refresh token to Firebase
282
  data = {"refresh_token": new_refresh_token}
 
290
  f"Failed to update refresh token. Status code: {response.status}, Error: {await response.text()}"
291
  )
292
 
293
+ async def make_request_with_retry(self, session, method, url, headers, data):
294
+ if type(data) == dict:
295
+ args = {"json": data}
296
+ else:
297
+ args = {"data": data}
298
+ # print(**args)
299
+ async with session.request(method, url, headers=headers, **args) as response:
300
+ if response.status < 300:
301
+ return await response.json()
302
+ elif response.status == 401:
303
+ raise aiohttp.ClientResponseError(
304
+ response.request_info, response.history, status=response.status
305
+ )
306
+ else:
307
+ raise aiohttp.ClientResponseError(
308
+ response.request_info, response.history, status=response.status
309
+ )
310
+
311
+ async def make_authenticated_request(
312
+ self,
313
+ url,
314
+ method="GET",
315
+ data=None,
316
+ ):
317
  if not self.bearer_token:
318
  await self.login_and_get_bearer_token() # Make sure we have a valid bearer token
319
 
 
324
  "accept-version": "v1",
325
  "authorization": f"Bearer {self.bearer_token}",
326
  "cache-control": "no-cache",
 
327
  "origin": "https://web.descript.com",
328
  "pragma": "no-cache",
329
  "referer": "https://web.descript.com/",
 
343
  }
344
 
345
  async with aiohttp.ClientSession() as session:
346
+ try:
347
+ return await self.make_request_with_retry(
348
+ session, method, url, headers, data
349
+ )
350
+ except aiohttp.ClientResponseError as e:
351
+ if e.status == 401:
352
+ self.refresh_token = None
 
353
  await self.login_and_get_bearer_token()
354
  headers["authorization"] = f"Bearer {self.bearer_token}"
355
+ return await self.make_request_with_retry(
356
+ session, method, url, headers, data
 
 
 
 
 
 
 
 
 
 
357
  )
358
+ else:
359
+ raise e
360
+
361
+ async def get_transcription(self, query: DescriptTranscript):
362
+ data = aiohttp.FormData()
363
+ audio_paths = []
364
+ audio_path = ""
365
+ for url in query.audio_url:
366
+ temp = await self.download_and_store_file(url)
367
+ audio_paths.append(temp)
368
+ audio_path = self.concatenate_wave_files(audio_paths)
369
+ data.add_field("audio", open(audio_path, "rb"))
370
+
371
+ data.add_field("text", query.text)
372
+ data.add_field("duration", self.calculate_audio_duration(audio_path))
373
+
374
+ try:
375
+ result = await self.make_authenticated_request(
376
+ url="https://aligner.descript.com/", method="POST", data=data
377
+ )
378
+ return result
379
+ except Exception as e:
380
+ print(f"Failed transcript {str(e)}")
381
 
382
  async def get_assets(self):
383
  url = "https://api.descript.com/v2/projects/f734c6d7-e39d-4c1d-8f41-417f94cd37ce/media_assets?include_artifacts=true&cursor=1702016922390&include_placeholder=true"
 
387
  except Exception as e:
388
  print(f"Failed to get assets: {str(e)}")
389
 
390
+ async def overdub_text(self, text, speaker="Lawrance", _voice_id=None):
391
  url = "https://api.descript.com/v2/projects/f734c6d7-e39d-4c1d-8f41-417f94cd37ce/overdub"
392
  voice_id = _voice_id or self.voice_ids[speaker]
393
  data = {
 
438
  async def request_status(self, id):
439
  status = await self.overdub_staus(id)
440
  if status["state"] == "done":
441
+ asset_id = status["result"]["imputation_audio_asset_id"]
442
  overdub = await self.get_assets()
443
  for asset in overdub["data"]:
444
  if asset["id"] == asset_id:
445
  data = TTSResponse(**asset)
446
  url = data.artifacts[0].read_url
447
+ return {"url": url, "status": "done"}
448
  return status
449
 
 
 
450
  async def say(self, text, speaker="Henry"):
451
  overdub = await self.overdub_text(text, speaker=speaker)
452
+
453
  asset_id = None
454
  while True:
455
  status = await self.overdub_staus(overdub["id"])
 
468
  print(url)
469
  path = await self.download_and_store_file(str(url))
470
  return path, url
 
 
requirements.txt CHANGED
@@ -22,6 +22,7 @@ html5lib
22
  requests
23
  bs4
24
  strenum
 
25
 
26
 
27
  git+https://github.com/snowby666/poe-api-wrapper.git
 
22
  requests
23
  bs4
24
  strenum
25
+ pydub
26
 
27
 
28
  git+https://github.com/snowby666/poe-api-wrapper.git