Spaces:
Runtime error
Runtime error
File size: 6,204 Bytes
0668b67 725ee09 6c8885f c6fcce4 0668b67 2914c65 0668b67 879c526 dd3abd9 0668b67 261c279 0668b67 65c21fd 37b0e3a 6eaf87c 0668b67 58cb1a8 0668b67 1567e3c 58cb1a8 c6fcce4 58cb1a8 0668b67 1eac7b5 0668b67 75194a0 879c526 d65e856 879c526 4166423 879c526 9a63ffb eccdcbf 879c526 0668b67 ec1277a 0668b67 55070d4 0668b67 1567e3c 879c526 1567e3c 0668b67 a7a78fa 0668b67 879c526 517eb68 6eaf87c 5e0755e 0668b67 c6fcce4 36b2c0f 75194a0 0668b67 1567e3c 0668b67 517eb68 49f5187 0668b67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
import torch
import json
import os
from transformers import pipeline, VitsModel, VitsTokenizer, SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor
import numpy as np
os.system("pip install git+https://github.com/openai/whisper.git")
import gradio as gr
import requests
MODEL = "gpt-3.5-turbo"
API_URL = os.getenv("API_URL")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
NUM_THREADS = 2
device = "cuda:0" if torch.cuda.is_available() else "cpu"
def parse_codeblock(text):
lines = text.split("\n")
for i, line in enumerate(lines):
if "```" in line:
if line != "```":
lines[i] = f'<pre><code class="{lines[i][3:]}">'
else:
lines[i] = '</code></pre>'
else:
if i > 0:
lines[i] = "<br/>" + line.replace("<", "<").replace(">", ">")
return "".join(lines)
pipe = pipeline(model="Sleepyp00/whisper-small-Swedish")
model2 = VitsModel.from_pretrained("facebook/mms-tts-eng")
tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
# Define a function to translate an audio, in english here
def translate(audio):
outputs = pipe(audio, max_new_tokens=256,
generate_kwargs={"task": "translate"})
return outputs["text"]
# Define function to generate the waveform output
def synthesise(text):
inputs = tokenizer(text, return_tensors="pt")
input_ids = inputs["input_ids"]
with torch.no_grad():
outputs = model2(input_ids)
return outputs.audio[0]
def gpt_predict(inputs, request:gr.Request=gr.State([]), top_p = 1, temperature = 1, chat_counter = 0,history =[]):
payload = {
"model": MODEL,
"messages": [{"role": "user", "content": f"{inputs}"}],
"temperature" : 1.0,
"top_p":1.0,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}",
}
# print(f"chat_counter - {chat_counter}")
if chat_counter != 0 :
messages = []
for i, data in enumerate(history):
if i % 2 == 0:
role = 'user'
else:
role = 'assistant'
message = {}
message["role"] = role
message["content"] = data
messages.append(message)
message = {}
message["role"] = "user"
message["content"] = inputs
messages.append(message)
payload = {
"model": MODEL,
"messages": messages,
"temperature" : temperature,
"top_p": top_p,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
chat_counter += 1
history.append(inputs)
token_counter = 0
partial_words = ""
counter = 0
try:
# make a POST request to the API endpoint using the requests.post method, passing in stream=True
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
response_code = f"{response}"
#if response_code.strip() != "<Response [200]>":
# #print(f"response code - {response}")
# raise Exception(f"Sorry, hitting rate limit. Please try again later. {response}")
out = []
for chunk in response.iter_lines():
#Skipping first chunk
if counter == 0:
counter += 1
continue
#counter+=1
# check whether each line is non-empty
if chunk.decode() :
chunk = chunk.decode()
# decode each line as response data is in bytes
if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
if token_counter == 0:
history.append(" " + partial_words)
else:
history[-1] = partial_words
token_counter += 1
except Exception as e:
print (f'error found: {e}')
return partial_words
# Define the pipeline
def speech_to_speech_translation(audio):
translated_text = translate(audio)
synthesised_speech = synthesise(translated_text)
synthesised_speech = (
synthesised_speech.numpy() * 32767).astype(np.int16)
return [translated_text, None, (16000, synthesised_speech)]
def predict(transType, language, audio, audio_mic = None):
print("debug1:", audio,"debug2", audio_mic)
if not audio and audio_mic:
audio = audio_mic
if transType == "Text":
return translate(audio), None, None
if transType == "GPT answer":
req = translate(audio)
st = gr.State([])
return req, gpt_predict(req,st), None
if transType == "Audio":
return speech_to_speech_translation(audio)
# Define the title etc
title = "Swedish STSOT (Speech To Speech Or Text)"
description="Use Whisper pretrained model to convert swedish audio to english (text or audio)"
supportLangs = ["Swedish", "French (in training)"]
transTypes = ["Text", "Audio", "GPT answer"]
examples = [
["Text", "Swedish", "./ex1.wav", None],
["Audio", "Swedish", "./ex2.wav", None]
]
demo = gr.Interface(
fn=predict,
inputs=[
gr.Radio(label="Choose your output format", choices=transTypes),
gr.Radio(label="Choose a source language", choices=supportLangs, value="Swedish"),
gr.Audio(label="Import an audio", sources="upload", type="filepath"),
#gr.Audio(label="Import an audio", sources="upload", type="numpy"),
gr.Audio(label="Record an audio", sources="microphone", type="filepath"),
],
outputs=[
gr.Text(label="Text translation"),gr.Text(label="GPT answer"),gr.Audio(label="Audio translation",type = "numpy")
],
title=title,
description=description,
article="",
examples=examples,
)
demo.launch() |