Spaces:

ariankhalfani
/

RobertaSpeak

Runtime error

App Files Files Community

ariankhalfani commited on Jun 16, 2024

Commit

03fe636

verified ·

1 Parent(s): 1789b0c

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -14

app.py CHANGED Viewed

@@ -10,16 +10,13 @@ API_URL_ROBERTA = "https://api-inference.huggingface.co/models/deepset/roberta-b
 API_URL_TTS = "https://api-inference.huggingface.co/models/espnet/english_male_ryanspeech_tacotron"
 API_URL_WHISPER = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
-# Hugging Face API Token
-API_TOKEN = os.getenv("HF_API_KEY")  # Ensure you have set this environment variable
-HEADERS = {"Authorization": f"Bearer {API_TOKEN}"}
 # Retry settings
 MAX_RETRIES = 5
 RETRY_DELAY = 1  # seconds
 # Function to query the Whisper model for audio transcription
-def query_whisper(audio_path):
     for attempt in range(MAX_RETRIES):
         try:
             if not audio_path:
@@ -30,7 +27,7 @@ def query_whisper(audio_path):
             with open(audio_path, "rb") as f:
                 data = f.read()
-            response = requests.post(API_URL_WHISPER, headers=HEADERS, files={"file": data})
             response.raise_for_status()
             return response.json()
@@ -43,12 +40,13 @@ def query_whisper(audio_path):
                 return {"error": str(e)}
 # Function to query the RoBERTa model
-def query_roberta(prompt, context):
     payload = {"inputs": {"question": prompt, "context": context}}
     for attempt in range(MAX_RETRIES):
         try:
-            response = requests.post(API_URL_ROBERTA, headers=HEADERS, json=payload)
             response.raise_for_status()
             return response.json()
         except Exception as e:
@@ -60,12 +58,13 @@ def query_roberta(prompt, context):
                 return {"error": str(e)}
 # Function to generate speech from text using ESPnet TTS
-def generate_speech(answer):
     payload = {"inputs": answer}
     for attempt in range(MAX_RETRIES):
         try:
-            response = requests.post(API_URL_TTS, headers=HEADERS, json=payload)
             response.raise_for_status()
             audio = response.content
@@ -82,25 +81,25 @@ def generate_speech(answer):
                 return {"error": str(e)}
 # Function to handle the entire process
-def handle_all(context, audio):
     for attempt in range(MAX_RETRIES):
         try:
             # Step 1: Transcribe audio
-            transcription = query_whisper(audio)
             if 'error' in transcription:
                 raise Exception(transcription['error'])
             question = transcription.get("text", "No transcription found")
             # Step 2: Get answer from RoBERTa
-            answer = query_roberta(question, context)
             if 'error' in answer:
                 raise Exception(answer['error'])
             answer_text = answer.get('answer', 'No answer found')
             # Step 3: Generate speech from answer
-            audio_file_path = generate_speech(answer_text)
             if 'error' in audio_file_path:
                 raise Exception(audio_file_path['error'])
@@ -118,6 +117,7 @@ def handle_all(context, audio):
 iface = gr.Interface(
     fn=handle_all,
     inputs=[
         gr.Textbox(lines=2, label="Context", placeholder="Enter the context here..."),
         gr.Audio(type="filepath", label="Record your voice")
     ],

 API_URL_TTS = "https://api-inference.huggingface.co/models/espnet/english_male_ryanspeech_tacotron"
 API_URL_WHISPER = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
 # Retry settings
 MAX_RETRIES = 5
 RETRY_DELAY = 1  # seconds
 # Function to query the Whisper model for audio transcription
+def query_whisper(api_token, audio_path):
+    headers = {"Authorization": f"Bearer {api_token}"}
     for attempt in range(MAX_RETRIES):
         try:
             if not audio_path:
             with open(audio_path, "rb") as f:
                 data = f.read()
+            response = requests.post(API_URL_WHISPER, headers=headers, files={"file": data})
             response.raise_for_status()
             return response.json()
                 return {"error": str(e)}
 # Function to query the RoBERTa model
+def query_roberta(api_token, prompt, context):
+    headers = {"Authorization": f"Bearer {api_token}"}
     payload = {"inputs": {"question": prompt, "context": context}}
     for attempt in range(MAX_RETRIES):
         try:
+            response = requests.post(API_URL_ROBERTA, headers=headers, json=payload)
             response.raise_for_status()
             return response.json()
         except Exception as e:
                 return {"error": str(e)}
 # Function to generate speech from text using ESPnet TTS
+def generate_speech(api_token, answer):
+    headers = {"Authorization": f"Bearer {api_token}"}
     payload = {"inputs": answer}
     for attempt in range(MAX_RETRIES):
         try:
+            response = requests.post(API_URL_TTS, headers=headers, json=payload)
             response.raise_for_status()
             audio = response.content
                 return {"error": str(e)}
 # Function to handle the entire process
+def handle_all(api_token, context, audio):
     for attempt in range(MAX_RETRIES):
         try:
             # Step 1: Transcribe audio
+            transcription = query_whisper(api_token, audio)
             if 'error' in transcription:
                 raise Exception(transcription['error'])
             question = transcription.get("text", "No transcription found")
             # Step 2: Get answer from RoBERTa
+            answer = query_roberta(api_token, question, context)
             if 'error' in answer:
                 raise Exception(answer['error'])
             answer_text = answer.get('answer', 'No answer found')
             # Step 3: Generate speech from answer
+            audio_file_path = generate_speech(api_token, answer_text)
             if 'error' in audio_file_path:
                 raise Exception(audio_file_path['error'])
 iface = gr.Interface(
     fn=handle_all,
     inputs=[
+        gr.Textbox(lines=1, label="Hugging Face API Token", type="password", placeholder="Enter your Hugging Face API token..."),
         gr.Textbox(lines=2, label="Context", placeholder="Enter the context here..."),
         gr.Audio(type="filepath", label="Record your voice")
     ],