Spaces:
Paused
Paused
File size: 4,977 Bytes
7886158 103f4bd 7886158 103f4bd 6baa22b 103f4bd 27a0fd9 2096aa8 27a0fd9 2096aa8 27a0fd9 2096aa8 27a0fd9 2096aa8 27a0fd9 103f4bd 2096aa8 103f4bd 7886158 103f4bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import gradio as gr
from transformers import pipeline
import librosa
########################LLama model###############################
# from transformers import AutoModelForCausalLM, AutoTokenizer
# model_name_or_path = "TheBloke/llama2_7b_chat_uncensored-GPTQ"
# # To use a different branch, change revision
# # For example: revision="main"
# model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
# device_map="auto",
# trust_remote_code=True,
# revision="main",
# #quantization_config=QuantizationConfig(disable_exllama=True)
# )
# tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
# Llama_pipe = pipeline(
# "text-generation",
# model=model,
# tokenizer=tokenizer,
# max_new_tokens=40,
# do_sample=True,
# temperature=0.7,
# top_p=0.95,
# top_k=40,
# repetition_penalty=1.1
# )
# history="""User: Hello, Rally?
# Rally: I'm happy to see you again. What you want to talk to day?
# User: Let's talk about food
# Rally: Sure.
# User: I'm hungry right now. Do you know any Vietnamese food?"""
# prompt_template = f"""<|im_start|>system
# Write one sentence to continue the conversation<|im_end|>
# {history}
# Rally:"""
# print(Llama_pipe(prompt_template)[0]['generated_text'])
# def RallyRespone(chat_history, message):
# chat_history += "User: " + message + "\n"
# t_chat = Llama_pipe(prompt_template)[0]['generated_text']
# res = t_chat[t_chat.rfind("Rally: "):]
# return res
########################ASR model###############################
from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr").to("cuda")
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr", do_upper_case=True)
def RallyListen(audio):
features = processor(audio, sampling_rate=16000, padding=True, return_tensors="pt")
input_features = features.input_features.to("cuda")
attention_mask = features.attention_mask.to("cuda")
gen_tokens = model.generate(input_features=input_features, attention_mask=attention_mask)
ret = processor.batch_decode(gen_tokens, skip_special_tokens=True)
return ret
########################Gradio UI###############################
# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.
def add_file(files):
return files.name
def print_like_dislike(x: gr.LikeData):
print(x.index, x.value, x.liked)
def upfile(files):
x = librosa.load(files, sr=16000)
print(x[0])
text = RallyListen(x[0])
return [text[0], text[0]]
def transcribe(audio):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"], transcriber({"sampling_rate": sr, "raw": y})["text"]
# def recommand(text):
# ret = "answer for"
# return ret + text
def add_text(history, text):
history = history + [(text, None)]
return history, gr.Textbox(value="", interactive=False)
# def bot(history):
# response = "**That's cool!**"
# history[-1][1] = ""
# for character in response:
# history[-1][1] += character
# time.sleep(0.05)
# yield history
with gr.Blocks() as demo:
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
bubble_full_width=False,
)
file_output = gr.File()
def respond(message, chat_history):
bot_message = RallyRespone(chat_history, message)
chat_history.append((message, bot_message))
time.sleep(2)
print (chat_history[-1])
return chat_history[-1][-1], chat_history
with gr.Row():
with gr.Column():
audio_speech = gr.Audio(sources=["microphone"])
submit = gr.Button("Submit")
send = gr.Button("Send")
btn = gr.UploadButton("📁", file_types=["audio"])
with gr.Column():
opt1 = gr.Button("1: ")
opt2 = gr.Button("2: ")
#submit.click(translate, inputs=audio_speech, outputs=[opt1, opt2])
# output is opt1 value, opt2 value [ , ]
file_msg = btn.upload(add_file, btn, file_output)
submit.click(upfile, inputs=file_output, outputs=[opt1, opt2])
send.click(transcribe, inputs=audio_speech, outputs=[opt1, opt2])
opt1.click(respond, [opt1, chatbot], [opt1, chatbot])
opt2.click(respond, [opt2, chatbot], [opt2, chatbot])
#opt2.click(recommand, inputs=opt2)
#click event maybe BOT . generate history = optx.value,
chatbot.like(print_like_dislike, None, None)
if __name__ == "__main__":
demo.queue()
demo.launch(debug=True) |