Spaces:
Runtime error
Runtime error
File size: 5,202 Bytes
2e528e6 1de6169 20917cf 2e528e6 374442f f6d6c77 2e528e6 20917cf 2e528e6 a69ae8e 1de6169 374442f 1de6169 2e528e6 623d74a 1de6169 a69ae8e 8afab49 a69ae8e 8afab49 a69ae8e 8afab49 a69ae8e 1de6169 2e528e6 1de6169 2e528e6 1de6169 2e528e6 623d74a 374442f f3ce39f 374442f 6e3fd02 374442f 088ea2e 623d74a 2e528e6 a69ae8e 1de6169 374442f 1de6169 2e528e6 623d74a a977576 623d74a 1de6169 623d74a 374442f 2e528e6 62e7d87 2e528e6 957670c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import gradio as gr
import os, gc, torch
from datetime import datetime
from huggingface_hub import hf_hub_download
from pynvml import *
nvmlInit()
gpu_h = nvmlDeviceGetHandleByIndex(0)
ctx_limit = 1024
import whisper
model1 = whisper.load_model("small")
title1 = "RWKV-4-Raven-7B-v8-Eng-20230408-ctx4096"
os.environ["RWKV_JIT_ON"] = '1'
os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
#from TTS.api import TTS
#tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
from rwkv.model import RWKV
model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-raven", filename=f"{title1}.pth")
model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
from rwkv.utils import PIPELINE, PIPELINE_ARGS
pipeline = PIPELINE(model, "20B_tokenizer.json")
def generate_prompt(instruction, input=None):
if input:
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
# Instruction:
{instruction}
# Input:
{input}
# Response:
"""
else:
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
# Instruction:
{instruction}
# Response:
"""
def evaluate(
# instruction,
audio,
# upload,
input=None,
token_count=200,
temperature=1.0,
top_p=0.7,
presencePenalty = 0.1,
countPenalty = 0.1,
):
res = []
# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model1
mel = whisper.log_mel_spectrogram(audio).to(model1.device)
# detect the spoken language
_, probs = model1.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")
# decode the audio
options = whisper.DecodingOptions()
result = whisper.decode(model1, mel, options)
args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
alpha_frequency = countPenalty,
alpha_presence = presencePenalty,
token_ban = [], # ban the generation of some tokens
token_stop = [0]) # stop generation whenever you see any token here
instruction = result.text
input = input.strip()
ctx = generate_prompt(instruction, input)
gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
all_tokens = []
out_last = 0
out_str = ''
occurrence = {}
state = None
for i in range(int(token_count)):
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
for n in occurrence:
out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
if token in args.token_stop:
break
all_tokens += [token]
if token not in occurrence:
occurrence[token] = 1
else:
occurrence[token] += 1
tmp = pipeline.decode(all_tokens[out_last:])
if '\ufffd' not in tmp:
out_str += tmp
yield out_str.strip()
out_last = i + 1
gc.collect()
torch.cuda.empty_cache()
res.append(out_str.strip())
# res1 = ' '.join(str(x) for x in res)
# tts.tts_to_file(res1, speaker_wav = upload, language="en", file_path="output.wav")
# return [result.text, res]
return [result.text, res]
# yield out_str.strip()
g = gr.Interface(
fn=evaluate,
inputs=[
# gr.components.Textbox(lines=2, label="Instruction", value="Tell me about ravens."),
gr.Audio(source="microphone", label = "请开始对话吧!", type="filepath"),
# gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)", type="filepath"),
gr.components.Textbox(lines=2, label="Input", placeholder="none"),
gr.components.Slider(minimum=10, maximum=200, step=10, value=150), # token_count
gr.components.Slider(minimum=0.2, maximum=2.0, step=0.1, value=1.0), # temperature
gr.components.Slider(minimum=0, maximum=1, step=0.05, value=0.5), # top_p
gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # presencePenalty
gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # countPenalty
],
outputs=[
gr.inputs.Textbox(
lines=1,
label="Speech to Text",
),
gr.inputs.Textbox(
lines=5,
label="Raven Output",
),
# gr.Audio(label="Audio with Custom Voice"),
],
title="🥳💬💕 - TalktoAI,随时随地,谈天说地!",
description="🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
article = "Powered by the RWKV Language Model"
)
g.queue(concurrency_count=1, max_size=10)
g.launch(show_error=True) |