Spaces:

FredZhang7
/

rwkv-6-world-1b6-chat

Running

App Files Files Community

FredZhang7 commited on Jan 20

Commit

f41c6d3

•

1 Parent(s): bbffb85

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -13

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ model_path = hf_hub_download(repo_id="BlinkDL/rwkv-5-world", filename=f"{title}.
 model = RWKV(model=model_path, strategy='cpu bf16')
 pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
 def generate_prompt(instruction, input=None, history=None):
     # parse the chat history into a string of user and assistant messages
     history_str = ""
@@ -32,6 +33,7 @@ Response:"""
 Assistant:"""
 examples = [
     ["東京で訪れるべき素晴らしい場所とその紹介をいくつか挙げてください。", "", 300, 1.2, 0.5, 0.5, 0.5],
     ["Écrivez un programme Python pour miner 1 Bitcoin, avec des commentaires.", "", 300, 1.2, 0.5, 0.5, 0.5],
@@ -42,7 +44,7 @@ examples = [
     ["You have $100, and your goal is to turn that into as much money as possible with AI and Machine Learning. Please respond with detailed plan.", "", 300, 1.2, 0.5, 0.5, 0.5],
 ]
-def evaluate(
     instruction,
     input=None,
     token_count=333,
@@ -59,9 +61,6 @@ def evaluate(
                      token_stop = [0]) # stop generation whenever you see any token here
     instruction = re.sub(r'\n{2,}', '\n', instruction).strip().replace('\r\n','\n')
-    no_history = (history is None)
-    if no_history:
-        input = re.sub(r'\n{2,}', '\n', input).strip().replace('\r\n','\n')
     ctx = generate_prompt(instruction, input, history)
     print(ctx + "\n")
@@ -89,8 +88,6 @@ def evaluate(
         tmp = pipeline.decode(all_tokens[out_last:])
         if '\ufffd' not in tmp:
             out_str += tmp
-            if no_history:
-                yield out_str.strip()
             out_last = i + 1
         if '\n\n' in out_str:
             break
@@ -98,11 +95,61 @@ def evaluate(
     del out
     del state
     gc.collect()
-    if no_history:
-        yield out_str.strip()
-    else:
-        history.append((instruction, out_str.strip()))
-        return history
 def user(message, chatbot):
     chatbot = chatbot or []
@@ -153,7 +200,7 @@ with gr.Blocks(title=title) as demo:
                 presence_penalty = presence_penalty_chat.value
                 count_penalty = count_penalty_chat.value
-                response = evaluate(instruction, None, token_count, temperature, top_p, presence_penalty, count_penalty, history)
                 history[-1][1] = response
                 return history
@@ -179,7 +226,7 @@ with gr.Blocks(title=title) as demo:
                     clear = gr.Button("Clear", variant="secondary")
                 output = gr.Textbox(label="Output", lines=5)
         data = gr.Dataset(components=[instruction, input_instruct, token_count_instruct, temperature_instruct, top_p_instruct, presence_penalty_instruct, count_penalty_instruct], samples=examples, label="Example Instructions", headers=["Instruction", "Input", "Max Tokens", "Temperature", "Top P", "Presence Penalty", "Count Penalty"])
-        submit.click(evaluate, [instruction, input_instruct, token_count_instruct, temperature_instruct, top_p_instruct, presence_penalty_instruct, count_penalty_instruct], [output])
         clear.click(lambda: None, [], [output])
         data.click(lambda x: x, [data], [instruction, input_instruct, token_count_instruct, temperature_instruct, top_p_instruct, presence_penalty_instruct, count_penalty_instruct])

 model = RWKV(model=model_path, strategy='cpu bf16')
 pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
 def generate_prompt(instruction, input=None, history=None):
     # parse the chat history into a string of user and assistant messages
     history_str = ""
 Assistant:"""
 examples = [
     ["東京で訪れるべき素晴らしい場所とその紹介をいくつか挙げてください。", "", 300, 1.2, 0.5, 0.5, 0.5],
     ["Écrivez un programme Python pour miner 1 Bitcoin, avec des commentaires.", "", 300, 1.2, 0.5, 0.5, 0.5],
     ["You have $100, and your goal is to turn that into as much money as possible with AI and Machine Learning. Please respond with detailed plan.", "", 300, 1.2, 0.5, 0.5, 0.5],
 ]
+def respond(
     instruction,
     input=None,
     token_count=333,
                      token_stop = [0]) # stop generation whenever you see any token here
     instruction = re.sub(r'\n{2,}', '\n', instruction).strip().replace('\r\n','\n')
     ctx = generate_prompt(instruction, input, history)
     print(ctx + "\n")
         tmp = pipeline.decode(all_tokens[out_last:])
         if '\ufffd' not in tmp:
             out_str += tmp
             out_last = i + 1
         if '\n\n' in out_str:
             break
     del out
     del state
     gc.collect()
+    return out_str.strip()
+def generator(
+    instruction,
+    input=None,
+    token_count=333,
+    temperature=1.0,
+    top_p=0.5,
+    presencePenalty = 0.5,
+    countPenalty = 0.5
+):
+    args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
+                     alpha_frequency = countPenalty,
+                     alpha_presence = presencePenalty,
+                     token_ban = [], # ban the generation of some tokens
+                     token_stop = [0]) # stop generation whenever you see any token here
+    instruction = re.sub(r'\n{2,}', '\n', instruction).strip().replace('\r\n','\n')
+    input = re.sub(r'\n{2,}', '\n', input).strip().replace('\r\n','\n')
+    ctx = generate_prompt(instruction, input, history)
+    print(ctx + "\n")
+    all_tokens = []
+    out_last = 0
+    out_str = ''
+    occurrence = {}
+    state = None
+    for i in range(int(token_count)):
+        out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
+        for n in occurrence:
+            out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
+        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
+        if token in args.token_stop:
+            break
+        all_tokens += [token]
+        for xxx in occurrence:
+            occurrence[xxx] *= 0.996
+        if token not in occurrence:
+            occurrence[token] = 1
+        else:
+            occurrence[token] += 1
+        tmp = pipeline.decode(all_tokens[out_last:])
+        if '\ufffd' not in tmp:
+            out_str += tmp
+            yield out_str.strip()
+            out_last = i + 1
+        if '\n\n' in out_str:
+            break
+    del out
+    del state
+    gc.collect()
+    yield out_str.strip()
 def user(message, chatbot):
     chatbot = chatbot or []
                 presence_penalty = presence_penalty_chat.value
                 count_penalty = count_penalty_chat.value
+                response = respond(instruction, None, token_count, temperature, top_p, presence_penalty, count_penalty, history)
                 history[-1][1] = response
                 return history
                     clear = gr.Button("Clear", variant="secondary")
                 output = gr.Textbox(label="Output", lines=5)
         data = gr.Dataset(components=[instruction, input_instruct, token_count_instruct, temperature_instruct, top_p_instruct, presence_penalty_instruct, count_penalty_instruct], samples=examples, label="Example Instructions", headers=["Instruction", "Input", "Max Tokens", "Temperature", "Top P", "Presence Penalty", "Count Penalty"])
+        submit.click(generator, [instruction, input_instruct, token_count_instruct, temperature_instruct, top_p_instruct, presence_penalty_instruct, count_penalty_instruct], [output])
         clear.click(lambda: None, [], [output])
         data.click(lambda x: x, [data], [instruction, input_instruct, token_count_instruct, temperature_instruct, top_p_instruct, presence_penalty_instruct, count_penalty_instruct])