Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,12 @@ import numpy as np
|
|
5 |
os.system("pip install git+https://github.com/openai/whisper.git")
|
6 |
import gradio as gr
|
7 |
import whisper
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
model = whisper.load_model("small")
|
10 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
@@ -26,12 +32,12 @@ def inference(audio):
|
|
26 |
return result.text
|
27 |
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
pipe = pipeline(model="Sleepyp00/whisper-small-Swedish")
|
35 |
|
36 |
model2 = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
37 |
tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
|
@@ -56,6 +62,91 @@ def synthesise(text):
|
|
56 |
return outputs.audio[0]
|
57 |
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
# Define the pipeline
|
60 |
def speech_to_speech_translation(audio):
|
61 |
translated_text = translate(audio)
|
@@ -71,6 +162,9 @@ def predict(transType, language, audio, audio_mic = None):
|
|
71 |
|
72 |
if transType == "Text":
|
73 |
return translate(audio), None
|
|
|
|
|
|
|
74 |
if transType == "Audio":
|
75 |
return speech_to_speech_translation(audio)
|
76 |
|
@@ -80,7 +174,7 @@ description="Use Whisper pretrained model to convert swedish audio to english (t
|
|
80 |
|
81 |
|
82 |
supportLangs = ["Swedish", "French (in training)"]
|
83 |
-
transTypes = ["Text", "Audio"]
|
84 |
|
85 |
#examples = [
|
86 |
# ["Text", "Swedish", "./ex1.wav", None],
|
@@ -98,7 +192,7 @@ demo = gr.Interface(
|
|
98 |
gr.Audio(label="Record an audio", sources="microphone", type="filepath"),
|
99 |
],
|
100 |
outputs=[
|
101 |
-
gr.Text(label="Text translation"),gr.Audio(label="Audio translation",type = "numpy")
|
102 |
],
|
103 |
title=title,
|
104 |
description=description,
|
|
|
5 |
os.system("pip install git+https://github.com/openai/whisper.git")
|
6 |
import gradio as gr
|
7 |
import whisper
|
8 |
+
import requests
|
9 |
+
|
10 |
+
MODEL = "gpt-3.5-turbo"
|
11 |
+
API_URL = os.getenv("API_URL")
|
12 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
13 |
+
NUM_THREADS = int(os.getenv("NUM_THREADS"))
|
14 |
|
15 |
model = whisper.load_model("small")
|
16 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
32 |
return result.text
|
33 |
|
34 |
|
35 |
+
Load Whisper-small
|
36 |
+
pipe = pipeline("automatic-speech-recognition",
|
37 |
+
model="openai/whisper-small",
|
38 |
+
device=device
|
39 |
+
)
|
40 |
+
#pipe = pipeline(model="Sleepyp00/whisper-small-Swedish")
|
41 |
|
42 |
model2 = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
43 |
tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
|
|
|
62 |
return outputs.audio[0]
|
63 |
|
64 |
|
65 |
+
def gpt_predict(inputs, top_p = 1, temperature = 1, chat_counter = 0,history =[], request:gr.Request):
|
66 |
+
payload = {
|
67 |
+
"model": MODEL,
|
68 |
+
"messages": [{"role": "user", "content": f"{inputs}"}],
|
69 |
+
"temperature" : 1.0,
|
70 |
+
"top_p":1.0,
|
71 |
+
"n" : 1,
|
72 |
+
"stream": True,
|
73 |
+
"presence_penalty":0,
|
74 |
+
"frequency_penalty":0,
|
75 |
+
}
|
76 |
+
|
77 |
+
headers = {
|
78 |
+
"Content-Type": "application/json",
|
79 |
+
"Authorization": f"Bearer {OPENAI_API_KEY}",
|
80 |
+
"Headers": f"{request.kwargs['headers']}"
|
81 |
+
}
|
82 |
+
|
83 |
+
# print(f"chat_counter - {chat_counter}")
|
84 |
+
if chat_counter != 0 :
|
85 |
+
messages = []
|
86 |
+
for i, data in enumerate(history):
|
87 |
+
if i % 2 == 0:
|
88 |
+
role = 'user'
|
89 |
+
else:
|
90 |
+
role = 'assistant'
|
91 |
+
message = {}
|
92 |
+
message["role"] = role
|
93 |
+
message["content"] = data
|
94 |
+
messages.append(message)
|
95 |
+
|
96 |
+
message = {}
|
97 |
+
message["role"] = "user"
|
98 |
+
message["content"] = inputs
|
99 |
+
messages.append(message)
|
100 |
+
payload = {
|
101 |
+
"model": MODEL,
|
102 |
+
"messages": messages,
|
103 |
+
"temperature" : temperature,
|
104 |
+
"top_p": top_p,
|
105 |
+
"n" : 1,
|
106 |
+
"stream": True,
|
107 |
+
"presence_penalty":0,
|
108 |
+
"frequency_penalty":0,
|
109 |
+
}
|
110 |
+
|
111 |
+
chat_counter += 1
|
112 |
+
|
113 |
+
history.append(inputs)
|
114 |
+
token_counter = 0
|
115 |
+
partial_words = ""
|
116 |
+
counter = 0
|
117 |
+
|
118 |
+
try:
|
119 |
+
# make a POST request to the API endpoint using the requests.post method, passing in stream=True
|
120 |
+
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
|
121 |
+
response_code = f"{response}"
|
122 |
+
#if response_code.strip() != "<Response [200]>":
|
123 |
+
# #print(f"response code - {response}")
|
124 |
+
# raise Exception(f"Sorry, hitting rate limit. Please try again later. {response}")
|
125 |
+
|
126 |
+
for chunk in response.iter_lines():
|
127 |
+
#Skipping first chunk
|
128 |
+
if counter == 0:
|
129 |
+
counter += 1
|
130 |
+
continue
|
131 |
+
#counter+=1
|
132 |
+
# check whether each line is non-empty
|
133 |
+
if chunk.decode() :
|
134 |
+
chunk = chunk.decode()
|
135 |
+
# decode each line as response data is in bytes
|
136 |
+
if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
|
137 |
+
partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
|
138 |
+
if token_counter == 0:
|
139 |
+
history.append(" " + partial_words)
|
140 |
+
else:
|
141 |
+
history[-1] = partial_words
|
142 |
+
token_counter += 1
|
143 |
+
yield [(parse_codeblock(history[i]), parse_codeblock(history[i + 1])) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=False), gr.update(interactive=False) # resembles {chatbot: chat, state: history}
|
144 |
+
except Exception as e:
|
145 |
+
print (f'error found: {e}')
|
146 |
+
yield [(parse_codeblock(history[i]), parse_codeblock(history[i + 1])) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=True), gr.update(interactive=True)
|
147 |
+
print(json.dumps({"chat_counter": chat_counter, "payload": payload, "partial_words": partial_words, "token_counter": token_counter, "counter": counter}))
|
148 |
+
|
149 |
+
|
150 |
# Define the pipeline
|
151 |
def speech_to_speech_translation(audio):
|
152 |
translated_text = translate(audio)
|
|
|
162 |
|
163 |
if transType == "Text":
|
164 |
return translate(audio), None
|
165 |
+
if transType == "GPT answer":
|
166 |
+
req = translate(audio)
|
167 |
+
return gpt_predict(req)
|
168 |
if transType == "Audio":
|
169 |
return speech_to_speech_translation(audio)
|
170 |
|
|
|
174 |
|
175 |
|
176 |
supportLangs = ["Swedish", "French (in training)"]
|
177 |
+
transTypes = ["Text", "Audio", "GPT answer"]
|
178 |
|
179 |
#examples = [
|
180 |
# ["Text", "Swedish", "./ex1.wav", None],
|
|
|
192 |
gr.Audio(label="Record an audio", sources="microphone", type="filepath"),
|
193 |
],
|
194 |
outputs=[
|
195 |
+
gr.Text(label="Text translation or gpt answer"),gr.Audio(label="Audio translation",type = "numpy")
|
196 |
],
|
197 |
title=title,
|
198 |
description=description,
|