Raven-with-Voice-Cloning

Runtime error

App Files Files Community

Kevin676 commited on Apr 8, 2023

Commit

62e7d87

•

1 Parent(s): 06083ff

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -31

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ from pynvml import *
 nvmlInit()
 gpu_h = nvmlDeviceGetHandleByIndex(0)
 ctx_limit = 1024
-title = "RWKV-4-Raven-7B-v8-Eng-20230408-ctx4096"
 os.environ["RWKV_JIT_ON"] = '1'
 os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
@@ -40,20 +39,21 @@ def generate_prompt(instruction, input=None):
 def evaluate(
     instruction,
-    input=None,
-    token_count=200,
-    temperature=1.0,
-    top_p=0.7,
-    presencePenalty = 0.1,
-    countPenalty = 0.1,
 ):
-    args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
-                     alpha_frequency = countPenalty,
-                     alpha_presence = presencePenalty,
                      token_ban = [], # ban the generation of some tokens
                      token_stop = [0]) # stop generation whenever you see any token here
     instruction = instruction.strip()
     input = input.strip()
     ctx = generate_prompt(instruction, input)
@@ -65,7 +65,7 @@ def evaluate(
     out_str = ''
     occurrence = {}
     state = None
-    for i in range(int(token_count)):
         out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
         for n in occurrence:
             out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
@@ -88,26 +88,16 @@ def evaluate(
     torch.cuda.empty_cache()
     yield out_str.strip()
-examples = [
-    ["Tell me about ravens.", "", 150, 1.0, 0.5, 0.4, 0.4],
-    ["Write a python function to mine 1 BTC, with details and comments.", "", 150, 1.0, 0.5, 0.2, 0.2],
-    ["Write a song about ravens.", "", 150, 1.0, 0.5, 0.4, 0.4],
-    ["Explain the following metaphor: Life is like cats.", "", 150, 1.0, 0.5, 0.4, 0.4],
-    ["Write a story using the following information", "A man named Alex chops a tree down", 150, 1.0, 0.5, 0.4, 0.4],
-    ["Generate a list of adjectives that describe a person as brave.", "", 150, 1.0, 0.5, 0.4, 0.4],
-    ["You have $100, and your goal is to turn that into as much money as possible with AI and Machine Learning. Please respond with detailed plan.", "", 150, 1.0, 0.5, 0.4, 0.4],
-]
 g = gr.Interface(
     fn=evaluate,
     inputs=[
         gr.components.Textbox(lines=2, label="Instruction", value="Tell me about ravens."),
-        gr.components.Textbox(lines=2, label="Input", placeholder="none"),
-        gr.components.Slider(minimum=10, maximum=200, step=10, value=150), # token_count
-        gr.components.Slider(minimum=0.2, maximum=2.0, step=0.1, value=1.0), # temperature
-        gr.components.Slider(minimum=0, maximum=1, step=0.05, value=0.5), # top_p
-        gr.components.Slider(0.0, 1.0, step=0.1, value=0.4),  # presencePenalty
-        gr.components.Slider(0.0, 1.0, step=0.1, value=0.4),  # countPenalty
     ],
     outputs=[
         gr.inputs.Textbox(
@@ -115,10 +105,9 @@ g = gr.Interface(
             label="Output",
         )
     ],
-    title=f"🐦Raven - {title}",
-    description="Raven is [RWKV 7B](https://github.com/BlinkDL/ChatRWKV) 100% RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) finetuned to follow instructions. *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen 1024. It is finetuned on [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca), codealpaca and more. For best results, *** keep you prompt short and clear ***.",
-    examples=examples,
-    cache_examples=False,
 )
 g.queue(concurrency_count=1, max_size=10)
 g.launch(share=False)

 nvmlInit()
 gpu_h = nvmlDeviceGetHandleByIndex(0)
 ctx_limit = 1024
 os.environ["RWKV_JIT_ON"] = '1'
 os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
 def evaluate(
     instruction,
+#    input=None,
+#    token_count=200,
+#    temperature=1.0,
+#    top_p=0.7,
+#    presencePenalty = 0.1,
+#    countPenalty = 0.1,
 ):
+    args = PIPELINE_ARGS(temperature = max(0.2, float(1)), top_p = float(0.5),
+                     alpha_frequency = 0.4,
+                     alpha_presence = 0.4,
                      token_ban = [], # ban the generation of some tokens
                      token_stop = [0]) # stop generation whenever you see any token here
     instruction = instruction.strip()
+    input=None
     input = input.strip()
     ctx = generate_prompt(instruction, input)
     out_str = ''
     occurrence = {}
     state = None
+    for i in range(int(150)):
         out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
         for n in occurrence:
             out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
     torch.cuda.empty_cache()
     yield out_str.strip()
 g = gr.Interface(
     fn=evaluate,
     inputs=[
         gr.components.Textbox(lines=2, label="Instruction", value="Tell me about ravens."),
+#        gr.components.Textbox(lines=2, label="Input", placeholder="none"),
+#        gr.components.Slider(minimum=10, maximum=200, step=10, value=150), # token_count
+#        gr.components.Slider(minimum=0.2, maximum=2.0, step=0.1, value=1.0), # temperature
+#        gr.components.Slider(minimum=0, maximum=1, step=0.05, value=0.5), # top_p
+#        gr.components.Slider(0.0, 1.0, step=0.1, value=0.4),  # presencePenalty
+#        gr.components.Slider(0.0, 1.0, step=0.1, value=0.4),  # countPenalty
     ],
     outputs=[
         gr.inputs.Textbox(
             label="Output",
         )
     ],
+    title="🥳💬💕 - TalktoAI，随时随地，谈天说地！",
+    description="🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！",
+    article = "Powered by the RWKV Language Model"
 )
 g.queue(concurrency_count=1, max_size=10)
 g.launch(share=False)