Spaces:

Yakova
/

Embedding

Running

App Files Files Community

Mbonea commited on May 10

Commit

d8bf43a

•

1 Parent(s): 200d685

added transcription and music search

Browse files

Files changed (4) hide show

App/TTS/Schemas.py +12 -6
App/TTS/TTSRoutes.py +12 -0
App/TTS/utils/Descript.py +150 -50
requirements.txt +1 -0

App/TTS/Schemas.py CHANGED Viewed

@@ -19,19 +19,25 @@ class Speak(BaseModel):
         )
         super().__init__(**data)
 class DescriptSfxRequest(BaseModel):
-    query:str
-class DescriptRequest(BaseModel):
     text: str
-    speaker: Optional[str]=Field(default="Lawrance")
-    _voice_id: Optional[str]
-class DescriptStatusRequest(BaseModel):
-    id:str
 class HeyGenTTSRequest(BaseModel):
     voice_id: str = Field(default="d7bbcdd6964c47bdaae26decade4a933")

         )
         super().__init__(**data)
 class DescriptSfxRequest(BaseModel):
+    query: str
+class DescriptTranscript(BaseModel):
+    audio_url: List[str]
     text: str
+class DescriptRequest(BaseModel):
+    text: str
+    speaker: Optional[str] = Field(default="Lawrance")
+    _voice_id: Optional[str]
+class DescriptStatusRequest(BaseModel):
+    id: str
 class HeyGenTTSRequest(BaseModel):
     voice_id: str = Field(default="d7bbcdd6964c47bdaae26decade4a933")

App/TTS/TTSRoutes.py CHANGED Viewed

@@ -8,6 +8,7 @@ from .Schemas import (
     DescriptRequest,
     DescriptStatusRequest,
     DescriptSfxRequest,
 )
 from .utils.Podcastle import PodcastleAPI
 from .utils.HeyGen import HeygenAPI
@@ -50,11 +51,22 @@ async def status_descript(req: DescriptStatusRequest):
     return await descript_tts.request_status(req.id)
 @tts_router.post("/descript_sfx")
 async def descript_sfx(req: DescriptSfxRequest):
     return await descript_tts.search_sound_effects(req.query)
 @tts_router.post("/descript_unsplash")
 async def descript_unsplash(req: DescriptSfxRequest):
     return await descript_tts.search_unsplash_images(req.query)

     DescriptRequest,
     DescriptStatusRequest,
     DescriptSfxRequest,
+    DescriptTranscript,
 )
 from .utils.Podcastle import PodcastleAPI
 from .utils.HeyGen import HeygenAPI
     return await descript_tts.request_status(req.id)
+@tts_router.post("/descript_music")
+async def descript_music(req: DescriptSfxRequest):
+    return await descript_tts.search_music(req.query)
 @tts_router.post("/descript_sfx")
 async def descript_sfx(req: DescriptSfxRequest):
     return await descript_tts.search_sound_effects(req.query)
+@tts_router.post("/descript_transcript")
+async def descript_transcript(req: DescriptTranscript):
+    return await descript_tts.get_transcription(req)
+    # return await descript_tts.search_sound_effects(req.query)
 @tts_router.post("/descript_unsplash")
 async def descript_unsplash(req: DescriptSfxRequest):
     return await descript_tts.search_unsplash_images(req.query)

App/TTS/utils/Descript.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import aiohttp
-import asyncio
 import json, pprint, uuid, os, datetime
 import tempfile, shutil
 from typing import List, Optional
 from datetime import datetime, timedelta
 from pydantic import BaseModel, HttpUrl
 class Metadata(BaseModel):
@@ -52,6 +54,55 @@ class DescriptTTS:
         self.refresh_token = refresh_token
         self.tau_id = "90f9e0ad-594e-4203-9297-d4c7cc691e5x"
     async def login_and_get_bearer_token(self):
         # Step 1: Use refresh token to get a new access token
         new_bearer_token, new_refresh_token = await self.refresh_access_token()
@@ -135,60 +186,80 @@ class DescriptTTS:
         return file_path
     async def search_unsplash_images(self, query_terms):
         url = "https://api.descript.com/v2/cloud_libraries/providers/unsplash/image/search"
         data = {
-            'tracking_info': {'project_id': self.project_id},
-            'pagination_info': {'page': 2, 'page_size': 25},
-            'query': {'terms': query_terms}
         }
         try:
-            response = await self.make_authenticated_request(url, method="POST", data=data)
             return response
         except Exception as e:
             print(f"Failed to search Unsplash images: {e}")
             return None
     async def search_sound_effects(self, query_terms):
         url = "https://api.descript.com/v2/cloud_libraries/providers/stock-sfx/audio/search"
         headers = {
-            'accept': 'application/json, text/plain, */*',
-            'accept-language': 'en-US,en;q=0.9',
-            'content-type': 'application/json',
-            'authorization': f'Bearer {self.bearer_token}',  # Use the valid bearer token
         }
         data = {
-            'tracking_info': {'project_id': self.project_id},
-            'pagination_info': {'page': 1, 'page_size': 25},
-            'query': {'terms': query_terms}
         }
         try:
-            response = await self.make_authenticated_request(url, method="POST", data=data)
             return response
         except Exception as e:
             print(f"Failed to search sound effects: {e}")
-            return {'status':str(e)}
     async def get_voices(self):
         url = "https://api.descript.com/v2/users/me/voices"
         try:
             response = await self.make_authenticated_request(url)
             voices = response
-            self.voice_ids = {voice['name']: voice['id'] for voice in voices}
             return voices
         except Exception as e:
             print(f"Failed to fetch voices: {e}")
             return None
     async def start_token_refresh_schedule(self):
         while True:
             try:
@@ -206,7 +277,6 @@ class DescriptTTS:
             # Wait for 24 hours before the next refresh
             await asyncio.sleep(24 * 60 * 60)
     async def update_refresh_token(self, new_refresh_token):
         # Update the new refresh token to Firebase
         data = {"refresh_token": new_refresh_token}
@@ -220,7 +290,30 @@ class DescriptTTS:
                         f"Failed to update refresh token. Status code: {response.status}, Error: {await response.text()}"
                     )
-    async def make_authenticated_request(self, url, method="GET", data=None):
         if not self.bearer_token:
             await self.login_and_get_bearer_token()  # Make sure we have a valid bearer token
@@ -231,7 +324,6 @@ class DescriptTTS:
             "accept-version": "v1",
             "authorization": f"Bearer {self.bearer_token}",
             "cache-control": "no-cache",
-            "content-type": "application/json",
             "origin": "https://web.descript.com",
             "pragma": "no-cache",
             "referer": "https://web.descript.com/",
@@ -251,29 +343,41 @@ class DescriptTTS:
         }
         async with aiohttp.ClientSession() as session:
-            async with session.request(
-                method, url, headers=headers, json=data
-            ) as response:
-                if response.status < 300:
-                    return await response.json()
-                elif response.status == 401:
-                    self.refresh_token =None
-                    # Retry the request after refreshing the token
                     await self.login_and_get_bearer_token()
                     headers["authorization"] = f"Bearer {self.bearer_token}"
-                    async with session.request(
-                        method, url, headers=headers, json=data
-                    ) as retry_response:
-                        if retry_response.status == 200:
-                            return await retry_response.json()
-                        else:
-                            raise Exception(
-                                f"Request failed even after refreshing token. Status code: {retry_response.status}, Error: {await retry_response.text()}"
-                            )
-                else:
-                    raise Exception(
-                        f"Request failed. Status code: {response.status}, Error: {await response.text()}"
                     )
     async def get_assets(self):
         url = "https://api.descript.com/v2/projects/f734c6d7-e39d-4c1d-8f41-417f94cd37ce/media_assets?include_artifacts=true&cursor=1702016922390&include_placeholder=true"
@@ -283,7 +387,7 @@ class DescriptTTS:
         except Exception as e:
             print(f"Failed to get assets: {str(e)}")
-    async def overdub_text(self, text, speaker="Lawrance",_voice_id=None):
         url = "https://api.descript.com/v2/projects/f734c6d7-e39d-4c1d-8f41-417f94cd37ce/overdub"
         voice_id = _voice_id or self.voice_ids[speaker]
         data = {
@@ -334,20 +438,18 @@ class DescriptTTS:
     async def request_status(self, id):
         status = await self.overdub_staus(id)
         if status["state"] == "done":
-            asset_id=status["result"]["imputation_audio_asset_id"]
             overdub = await self.get_assets()
             for asset in overdub["data"]:
                 if asset["id"] == asset_id:
                     data = TTSResponse(**asset)
                     url = data.artifacts[0].read_url
-                    return {'url':url,'status':'done'}
         return status
     async def say(self, text, speaker="Henry"):
         overdub = await self.overdub_text(text, speaker=speaker)
         asset_id = None
         while True:
             status = await self.overdub_staus(overdub["id"])
@@ -366,5 +468,3 @@ class DescriptTTS:
                 print(url)
                 path = await self.download_and_store_file(str(url))
                 return path, url

 import aiohttp
+import asyncio, wave
 import json, pprint, uuid, os, datetime
 import tempfile, shutil
 from typing import List, Optional
 from datetime import datetime, timedelta
 from pydantic import BaseModel, HttpUrl
+from App.TTS.Schemas import DescriptTranscript
+from pydub import AudioSegment
 class Metadata(BaseModel):
         self.refresh_token = refresh_token
         self.tau_id = "90f9e0ad-594e-4203-9297-d4c7cc691e5x"
+    def concatenate_wave_files(self, input_file_paths):
+        """
+        Concatenates multiple wave files and saves the result to a new file.
+        :param input_file_paths: A list of paths to the input wave files.
+        """
+        temp_dir = tempfile.mkdtemp()
+        # Generate a unique random filename
+        random_filename = str(uuid.uuid4()) + ".wav"
+        output_file_path = os.path.join(temp_dir, random_filename)
+        # Check if input file paths are provided
+        if not input_file_paths:
+            raise ValueError("No input file paths provided.")
+        # Validate output file path
+        if not output_file_path:
+            raise ValueError("Output file path is empty.")
+        # Validate input file paths
+        for input_file_path in input_file_paths:
+            if not input_file_path:
+                raise ValueError("Empty input file path found.")
+        # Validate and get parameters from the first input file
+        with wave.open(input_file_paths[0], "rb") as input_file:
+            n_channels = input_file.getnchannels()
+            sampwidth = input_file.getsampwidth()
+            framerate = input_file.getframerate()
+            comptype = input_file.getcomptype()
+            compname = input_file.getcompname()
+        # Open the output file for writing
+        output_file = wave.open(output_file_path, "wb")
+        output_file.setnchannels(n_channels)
+        output_file.setsampwidth(sampwidth)
+        output_file.setframerate(framerate)
+        output_file.setcomptype(comptype, compname)
+        # Concatenate and write data from all input files to the output file
+        for input_file_path in input_file_paths:
+            with wave.open(input_file_path, "rb") as input_file:
+                output_file.writeframes(input_file.readframes(input_file.getnframes()))
+        # Close the output file
+        output_file.close()
+        return output_file_path
     async def login_and_get_bearer_token(self):
         # Step 1: Use refresh token to get a new access token
         new_bearer_token, new_refresh_token = await self.refresh_access_token()
         return file_path
+    def calculate_audio_duration(self, audio_file):
+        wav_file = AudioSegment.from_file(audio_file, format="wav")
+        duration_in_seconds = str(float(len(wav_file) / 1000))
+        return duration_in_seconds
     async def search_unsplash_images(self, query_terms):
         url = "https://api.descript.com/v2/cloud_libraries/providers/unsplash/image/search"
         data = {
+            "tracking_info": {"project_id": self.project_id},
+            "pagination_info": {"page": 2, "page_size": 25},
+            "query": {"terms": query_terms},
         }
         try:
+            response = await self.make_authenticated_request(
+                url, method="POST", data=data
+            )
             return response
         except Exception as e:
             print(f"Failed to search Unsplash images: {e}")
             return None
+    async def search_music(self, query_terms):
+        url = "https://web.descript.com/v2/cloud_libraries/providers/stock-music/audio/search"
+        data = {
+            "tracking_info": {"project_id": self.project_id},
+            "pagination_info": {"page": 2, "page_size": 25},
+            "query": {"terms": query_terms},
+        }
+        try:
+            response = await self.make_authenticated_request(
+                url, method="POST", data=data
+            )
+            return response
+        except Exception as e:
+            print(f"Failed to search music: {e}")
+            return None
     async def search_sound_effects(self, query_terms):
         url = "https://api.descript.com/v2/cloud_libraries/providers/stock-sfx/audio/search"
         headers = {
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "en-US,en;q=0.9",
+            "content-type": "application/json",
+            "authorization": f"Bearer {self.bearer_token}",  # Use the valid bearer token
         }
         data = {
+            "tracking_info": {"project_id": self.project_id},
+            "pagination_info": {"page": 1, "page_size": 25},
+            "query": {"terms": query_terms},
         }
         try:
+            response = await self.make_authenticated_request(
+                url, method="POST", data=data
+            )
             return response
         except Exception as e:
             print(f"Failed to search sound effects: {e}")
+            return {"status": str(e)}
     async def get_voices(self):
         url = "https://api.descript.com/v2/users/me/voices"
         try:
             response = await self.make_authenticated_request(url)
             voices = response
+            self.voice_ids = {voice["name"]: voice["id"] for voice in voices}
             return voices
         except Exception as e:
             print(f"Failed to fetch voices: {e}")
             return None
     async def start_token_refresh_schedule(self):
         while True:
             try:
             # Wait for 24 hours before the next refresh
             await asyncio.sleep(24 * 60 * 60)
     async def update_refresh_token(self, new_refresh_token):
         # Update the new refresh token to Firebase
         data = {"refresh_token": new_refresh_token}
                         f"Failed to update refresh token. Status code: {response.status}, Error: {await response.text()}"
                     )
+    async def make_request_with_retry(self, session, method, url, headers, data):
+        if type(data) == dict:
+            args = {"json": data}
+        else:
+            args = {"data": data}
+        # print(**args)
+        async with session.request(method, url, headers=headers, **args) as response:
+            if response.status < 300:
+                return await response.json()
+            elif response.status == 401:
+                raise aiohttp.ClientResponseError(
+                    response.request_info, response.history, status=response.status
+                )
+            else:
+                raise aiohttp.ClientResponseError(
+                    response.request_info, response.history, status=response.status
+                )
+    async def make_authenticated_request(
+        self,
+        url,
+        method="GET",
+        data=None,
+    ):
         if not self.bearer_token:
             await self.login_and_get_bearer_token()  # Make sure we have a valid bearer token
             "accept-version": "v1",
             "authorization": f"Bearer {self.bearer_token}",
             "cache-control": "no-cache",
             "origin": "https://web.descript.com",
             "pragma": "no-cache",
             "referer": "https://web.descript.com/",
         }
         async with aiohttp.ClientSession() as session:
+            try:
+                return await self.make_request_with_retry(
+                    session, method, url, headers, data
+                )
+            except aiohttp.ClientResponseError as e:
+                if e.status == 401:
+                    self.refresh_token = None
                     await self.login_and_get_bearer_token()
                     headers["authorization"] = f"Bearer {self.bearer_token}"
+                    return await self.make_request_with_retry(
+                        session, method, url, headers, data
                     )
+                else:
+                    raise e
+    async def get_transcription(self, query: DescriptTranscript):
+        data = aiohttp.FormData()
+        audio_paths = []
+        audio_path = ""
+        for url in query.audio_url:
+            temp = await self.download_and_store_file(url)
+            audio_paths.append(temp)
+        audio_path = self.concatenate_wave_files(audio_paths)
+        data.add_field("audio", open(audio_path, "rb"))
+        data.add_field("text", query.text)
+        data.add_field("duration", self.calculate_audio_duration(audio_path))
+        try:
+            result = await self.make_authenticated_request(
+                url="https://aligner.descript.com/", method="POST", data=data
+            )
+            return result
+        except Exception as e:
+            print(f"Failed transcript {str(e)}")
     async def get_assets(self):
         url = "https://api.descript.com/v2/projects/f734c6d7-e39d-4c1d-8f41-417f94cd37ce/media_assets?include_artifacts=true&cursor=1702016922390&include_placeholder=true"
         except Exception as e:
             print(f"Failed to get assets: {str(e)}")
+    async def overdub_text(self, text, speaker="Lawrance", _voice_id=None):
         url = "https://api.descript.com/v2/projects/f734c6d7-e39d-4c1d-8f41-417f94cd37ce/overdub"
         voice_id = _voice_id or self.voice_ids[speaker]
         data = {
     async def request_status(self, id):
         status = await self.overdub_staus(id)
         if status["state"] == "done":
+            asset_id = status["result"]["imputation_audio_asset_id"]
             overdub = await self.get_assets()
             for asset in overdub["data"]:
                 if asset["id"] == asset_id:
                     data = TTSResponse(**asset)
                     url = data.artifacts[0].read_url
+                    return {"url": url, "status": "done"}
         return status
     async def say(self, text, speaker="Henry"):
         overdub = await self.overdub_text(text, speaker=speaker)
         asset_id = None
         while True:
             status = await self.overdub_staus(overdub["id"])
                 print(url)
                 path = await self.download_and_store_file(str(url))
                 return path, url

requirements.txt CHANGED Viewed

@@ -22,6 +22,7 @@ html5lib
 requests
 bs4
 strenum
 git+https://github.com/snowby666/poe-api-wrapper.git

 requests
 bs4
 strenum
+pydub
 git+https://github.com/snowby666/poe-api-wrapper.git