cotxetj commited on
Commit
879c526
1 Parent(s): 4c84112

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -8
app.py CHANGED
@@ -5,6 +5,12 @@ import numpy as np
5
  os.system("pip install git+https://github.com/openai/whisper.git")
6
  import gradio as gr
7
  import whisper
 
 
 
 
 
 
8
 
9
  model = whisper.load_model("small")
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -26,12 +32,12 @@ def inference(audio):
26
  return result.text
27
 
28
 
29
- # Load Whisper-small
30
- # pipe = pipeline("automatic-speech-recognition",
31
- # model="openai/whisper-small",
32
- # device=device
33
- # )
34
- pipe = pipeline(model="Sleepyp00/whisper-small-Swedish")
35
 
36
  model2 = VitsModel.from_pretrained("facebook/mms-tts-eng")
37
  tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
@@ -56,6 +62,91 @@ def synthesise(text):
56
  return outputs.audio[0]
57
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  # Define the pipeline
60
  def speech_to_speech_translation(audio):
61
  translated_text = translate(audio)
@@ -71,6 +162,9 @@ def predict(transType, language, audio, audio_mic = None):
71
 
72
  if transType == "Text":
73
  return translate(audio), None
 
 
 
74
  if transType == "Audio":
75
  return speech_to_speech_translation(audio)
76
 
@@ -80,7 +174,7 @@ description="Use Whisper pretrained model to convert swedish audio to english (t
80
 
81
 
82
  supportLangs = ["Swedish", "French (in training)"]
83
- transTypes = ["Text", "Audio"]
84
 
85
  #examples = [
86
  # ["Text", "Swedish", "./ex1.wav", None],
@@ -98,7 +192,7 @@ demo = gr.Interface(
98
  gr.Audio(label="Record an audio", sources="microphone", type="filepath"),
99
  ],
100
  outputs=[
101
- gr.Text(label="Text translation"),gr.Audio(label="Audio translation",type = "numpy")
102
  ],
103
  title=title,
104
  description=description,
 
5
  os.system("pip install git+https://github.com/openai/whisper.git")
6
  import gradio as gr
7
  import whisper
8
+ import requests
9
+
10
+ MODEL = "gpt-3.5-turbo"
11
+ API_URL = os.getenv("API_URL")
12
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
+ NUM_THREADS = int(os.getenv("NUM_THREADS"))
14
 
15
  model = whisper.load_model("small")
16
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
32
  return result.text
33
 
34
 
35
+ Load Whisper-small
36
+ pipe = pipeline("automatic-speech-recognition",
37
+ model="openai/whisper-small",
38
+ device=device
39
+ )
40
+ #pipe = pipeline(model="Sleepyp00/whisper-small-Swedish")
41
 
42
  model2 = VitsModel.from_pretrained("facebook/mms-tts-eng")
43
  tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
 
62
  return outputs.audio[0]
63
 
64
 
65
+ def gpt_predict(inputs, top_p = 1, temperature = 1, chat_counter = 0,history =[], request:gr.Request):
66
+ payload = {
67
+ "model": MODEL,
68
+ "messages": [{"role": "user", "content": f"{inputs}"}],
69
+ "temperature" : 1.0,
70
+ "top_p":1.0,
71
+ "n" : 1,
72
+ "stream": True,
73
+ "presence_penalty":0,
74
+ "frequency_penalty":0,
75
+ }
76
+
77
+ headers = {
78
+ "Content-Type": "application/json",
79
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
80
+ "Headers": f"{request.kwargs['headers']}"
81
+ }
82
+
83
+ # print(f"chat_counter - {chat_counter}")
84
+ if chat_counter != 0 :
85
+ messages = []
86
+ for i, data in enumerate(history):
87
+ if i % 2 == 0:
88
+ role = 'user'
89
+ else:
90
+ role = 'assistant'
91
+ message = {}
92
+ message["role"] = role
93
+ message["content"] = data
94
+ messages.append(message)
95
+
96
+ message = {}
97
+ message["role"] = "user"
98
+ message["content"] = inputs
99
+ messages.append(message)
100
+ payload = {
101
+ "model": MODEL,
102
+ "messages": messages,
103
+ "temperature" : temperature,
104
+ "top_p": top_p,
105
+ "n" : 1,
106
+ "stream": True,
107
+ "presence_penalty":0,
108
+ "frequency_penalty":0,
109
+ }
110
+
111
+ chat_counter += 1
112
+
113
+ history.append(inputs)
114
+ token_counter = 0
115
+ partial_words = ""
116
+ counter = 0
117
+
118
+ try:
119
+ # make a POST request to the API endpoint using the requests.post method, passing in stream=True
120
+ response = requests.post(API_URL, headers=headers, json=payload, stream=True)
121
+ response_code = f"{response}"
122
+ #if response_code.strip() != "<Response [200]>":
123
+ # #print(f"response code - {response}")
124
+ # raise Exception(f"Sorry, hitting rate limit. Please try again later. {response}")
125
+
126
+ for chunk in response.iter_lines():
127
+ #Skipping first chunk
128
+ if counter == 0:
129
+ counter += 1
130
+ continue
131
+ #counter+=1
132
+ # check whether each line is non-empty
133
+ if chunk.decode() :
134
+ chunk = chunk.decode()
135
+ # decode each line as response data is in bytes
136
+ if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
137
+ partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
138
+ if token_counter == 0:
139
+ history.append(" " + partial_words)
140
+ else:
141
+ history[-1] = partial_words
142
+ token_counter += 1
143
+ yield [(parse_codeblock(history[i]), parse_codeblock(history[i + 1])) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=False), gr.update(interactive=False) # resembles {chatbot: chat, state: history}
144
+ except Exception as e:
145
+ print (f'error found: {e}')
146
+ yield [(parse_codeblock(history[i]), parse_codeblock(history[i + 1])) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=True), gr.update(interactive=True)
147
+ print(json.dumps({"chat_counter": chat_counter, "payload": payload, "partial_words": partial_words, "token_counter": token_counter, "counter": counter}))
148
+
149
+
150
  # Define the pipeline
151
  def speech_to_speech_translation(audio):
152
  translated_text = translate(audio)
 
162
 
163
  if transType == "Text":
164
  return translate(audio), None
165
+ if transType == "GPT answer":
166
+ req = translate(audio)
167
+ return gpt_predict(req)
168
  if transType == "Audio":
169
  return speech_to_speech_translation(audio)
170
 
 
174
 
175
 
176
  supportLangs = ["Swedish", "French (in training)"]
177
+ transTypes = ["Text", "Audio", "GPT answer"]
178
 
179
  #examples = [
180
  # ["Text", "Swedish", "./ex1.wav", None],
 
192
  gr.Audio(label="Record an audio", sources="microphone", type="filepath"),
193
  ],
194
  outputs=[
195
+ gr.Text(label="Text translation or gpt answer"),gr.Audio(label="Audio translation",type = "numpy")
196
  ],
197
  title=title,
198
  description=description,