Spaces:

cotxetj
/

swedish-to-speech-or-text

Runtime error

App Files Files Community

cotxetj commited on Dec 2, 2023

Commit

879c526

•

1 Parent(s): 4c84112

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -8

app.py CHANGED Viewed

@@ -5,6 +5,12 @@ import numpy as np
 os.system("pip install git+https://github.com/openai/whisper.git")
 import gradio as gr
 import whisper
 model = whisper.load_model("small")
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -26,12 +32,12 @@ def inference(audio):
     return result.text
-# Load Whisper-small
-# pipe = pipeline("automatic-speech-recognition",
-#                 model="openai/whisper-small",
-#                 device=device
-# )
-pipe = pipeline(model="Sleepyp00/whisper-small-Swedish")
 model2 = VitsModel.from_pretrained("facebook/mms-tts-eng")
 tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
@@ -56,6 +62,91 @@ def synthesise(text):
     return outputs.audio[0]
 # Define the pipeline
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
@@ -71,6 +162,9 @@ def predict(transType, language, audio, audio_mic = None):
         if transType == "Text":
             return translate(audio), None
         if transType == "Audio":
             return speech_to_speech_translation(audio)
@@ -80,7 +174,7 @@ description="Use Whisper pretrained model to convert swedish audio to english (t
 supportLangs = ["Swedish", "French (in training)"]
-transTypes = ["Text", "Audio"]
 #examples = [
 #    ["Text", "Swedish", "./ex1.wav", None],
@@ -98,7 +192,7 @@ demo = gr.Interface(
         gr.Audio(label="Record an audio", sources="microphone", type="filepath"),
     ],
     outputs=[
-        gr.Text(label="Text translation"),gr.Audio(label="Audio translation",type = "numpy")
     ],
     title=title,
     description=description,

 os.system("pip install git+https://github.com/openai/whisper.git")
 import gradio as gr
 import whisper
+import requests
+MODEL = "gpt-3.5-turbo"
+API_URL = os.getenv("API_URL")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+NUM_THREADS = int(os.getenv("NUM_THREADS"))
 model = whisper.load_model("small")
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
     return result.text
+Load Whisper-small
+pipe = pipeline("automatic-speech-recognition",
+                model="openai/whisper-small",
+                device=device
+)
+#pipe = pipeline(model="Sleepyp00/whisper-small-Swedish")
 model2 = VitsModel.from_pretrained("facebook/mms-tts-eng")
 tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
     return outputs.audio[0]
+def gpt_predict(inputs, top_p = 1, temperature = 1, chat_counter = 0,history =[], request:gr.Request):
+    payload = {
+        "model": MODEL,
+        "messages": [{"role": "user", "content": f"{inputs}"}],
+        "temperature" : 1.0,
+        "top_p":1.0,
+        "n" : 1,
+        "stream": True,
+        "presence_penalty":0,
+        "frequency_penalty":0,
+    }
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {OPENAI_API_KEY}",
+        "Headers": f"{request.kwargs['headers']}"
+    }
+    # print(f"chat_counter - {chat_counter}")
+    if chat_counter != 0 :
+        messages = []
+        for i, data in enumerate(history):
+            if i % 2 == 0:
+                role = 'user'
+            else:
+                role = 'assistant'
+            message = {}
+            message["role"] = role
+            message["content"] = data
+            messages.append(message)
+        message = {}
+        message["role"] = "user"
+        message["content"] = inputs
+        messages.append(message)
+        payload = {
+            "model": MODEL,
+            "messages": messages,
+            "temperature" : temperature,
+            "top_p": top_p,
+            "n" : 1,
+            "stream": True,
+            "presence_penalty":0,
+            "frequency_penalty":0,
+        }
+    chat_counter += 1
+    history.append(inputs)
+    token_counter = 0
+    partial_words = ""
+    counter = 0
+    try:
+        # make a POST request to the API endpoint using the requests.post method, passing in stream=True
+        response = requests.post(API_URL, headers=headers, json=payload, stream=True)
+        response_code = f"{response}"
+        #if response_code.strip() != "<Response [200]>":
+        #    #print(f"response code - {response}")
+        #    raise Exception(f"Sorry, hitting rate limit. Please try again later. {response}")
+        for chunk in response.iter_lines():
+            #Skipping first chunk
+            if counter == 0:
+                counter += 1
+                continue
+                #counter+=1
+            # check whether each line is non-empty
+            if chunk.decode() :
+                chunk = chunk.decode()
+                # decode each line as response data is in bytes
+                if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
+                    partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
+                    if token_counter == 0:
+                        history.append(" " + partial_words)
+                    else:
+                        history[-1] = partial_words
+                    token_counter += 1
+                    yield [(parse_codeblock(history[i]), parse_codeblock(history[i + 1])) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=False), gr.update(interactive=False)  # resembles {chatbot: chat, state: history}
+    except Exception as e:
+        print (f'error found: {e}')
+    yield [(parse_codeblock(history[i]), parse_codeblock(history[i + 1])) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=True), gr.update(interactive=True)
+    print(json.dumps({"chat_counter": chat_counter, "payload": payload, "partial_words": partial_words, "token_counter": token_counter, "counter": counter}))
 # Define the pipeline
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
         if transType == "Text":
             return translate(audio), None
+        if transType == "GPT answer":
+            req = translate(audio)
+            return gpt_predict(req)
         if transType == "Audio":
             return speech_to_speech_translation(audio)
 supportLangs = ["Swedish", "French (in training)"]
+transTypes = ["Text", "Audio", "GPT answer"]
 #examples = [
 #    ["Text", "Swedish", "./ex1.wav", None],
         gr.Audio(label="Record an audio", sources="microphone", type="filepath"),
     ],
     outputs=[
+        gr.Text(label="Text translation or gpt answer"),gr.Audio(label="Audio translation",type = "numpy")
     ],
     title=title,
     description=description,