Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,6 @@ from pynvml import *
|
|
6 |
nvmlInit()
|
7 |
gpu_h = nvmlDeviceGetHandleByIndex(0)
|
8 |
ctx_limit = 1024
|
9 |
-
title = "RWKV-4-Raven-7B-v8-Eng-20230408-ctx4096"
|
10 |
|
11 |
os.environ["RWKV_JIT_ON"] = '1'
|
12 |
os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
|
@@ -40,20 +39,21 @@ def generate_prompt(instruction, input=None):
|
|
40 |
|
41 |
def evaluate(
|
42 |
instruction,
|
43 |
-
input=None,
|
44 |
-
token_count=200,
|
45 |
-
temperature=1.0,
|
46 |
-
top_p=0.7,
|
47 |
-
presencePenalty = 0.1,
|
48 |
-
countPenalty = 0.1,
|
49 |
):
|
50 |
-
args = PIPELINE_ARGS(temperature = max(0.2, float(
|
51 |
-
alpha_frequency =
|
52 |
-
alpha_presence =
|
53 |
token_ban = [], # ban the generation of some tokens
|
54 |
token_stop = [0]) # stop generation whenever you see any token here
|
55 |
|
56 |
instruction = instruction.strip()
|
|
|
57 |
input = input.strip()
|
58 |
ctx = generate_prompt(instruction, input)
|
59 |
|
@@ -65,7 +65,7 @@ def evaluate(
|
|
65 |
out_str = ''
|
66 |
occurrence = {}
|
67 |
state = None
|
68 |
-
for i in range(int(
|
69 |
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
|
70 |
for n in occurrence:
|
71 |
out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
|
@@ -88,26 +88,16 @@ def evaluate(
|
|
88 |
torch.cuda.empty_cache()
|
89 |
yield out_str.strip()
|
90 |
|
91 |
-
examples = [
|
92 |
-
["Tell me about ravens.", "", 150, 1.0, 0.5, 0.4, 0.4],
|
93 |
-
["Write a python function to mine 1 BTC, with details and comments.", "", 150, 1.0, 0.5, 0.2, 0.2],
|
94 |
-
["Write a song about ravens.", "", 150, 1.0, 0.5, 0.4, 0.4],
|
95 |
-
["Explain the following metaphor: Life is like cats.", "", 150, 1.0, 0.5, 0.4, 0.4],
|
96 |
-
["Write a story using the following information", "A man named Alex chops a tree down", 150, 1.0, 0.5, 0.4, 0.4],
|
97 |
-
["Generate a list of adjectives that describe a person as brave.", "", 150, 1.0, 0.5, 0.4, 0.4],
|
98 |
-
["You have $100, and your goal is to turn that into as much money as possible with AI and Machine Learning. Please respond with detailed plan.", "", 150, 1.0, 0.5, 0.4, 0.4],
|
99 |
-
]
|
100 |
-
|
101 |
g = gr.Interface(
|
102 |
fn=evaluate,
|
103 |
inputs=[
|
104 |
gr.components.Textbox(lines=2, label="Instruction", value="Tell me about ravens."),
|
105 |
-
gr.components.Textbox(lines=2, label="Input", placeholder="none"),
|
106 |
-
gr.components.Slider(minimum=10, maximum=200, step=10, value=150), # token_count
|
107 |
-
gr.components.Slider(minimum=0.2, maximum=2.0, step=0.1, value=1.0), # temperature
|
108 |
-
gr.components.Slider(minimum=0, maximum=1, step=0.05, value=0.5), # top_p
|
109 |
-
gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # presencePenalty
|
110 |
-
gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # countPenalty
|
111 |
],
|
112 |
outputs=[
|
113 |
gr.inputs.Textbox(
|
@@ -115,10 +105,9 @@ g = gr.Interface(
|
|
115 |
label="Output",
|
116 |
)
|
117 |
],
|
118 |
-
title=
|
119 |
-
description="
|
120 |
-
|
121 |
-
cache_examples=False,
|
122 |
)
|
123 |
g.queue(concurrency_count=1, max_size=10)
|
124 |
g.launch(share=False)
|
|
|
6 |
nvmlInit()
|
7 |
gpu_h = nvmlDeviceGetHandleByIndex(0)
|
8 |
ctx_limit = 1024
|
|
|
9 |
|
10 |
os.environ["RWKV_JIT_ON"] = '1'
|
11 |
os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
|
|
|
39 |
|
40 |
def evaluate(
|
41 |
instruction,
|
42 |
+
# input=None,
|
43 |
+
# token_count=200,
|
44 |
+
# temperature=1.0,
|
45 |
+
# top_p=0.7,
|
46 |
+
# presencePenalty = 0.1,
|
47 |
+
# countPenalty = 0.1,
|
48 |
):
|
49 |
+
args = PIPELINE_ARGS(temperature = max(0.2, float(1)), top_p = float(0.5),
|
50 |
+
alpha_frequency = 0.4,
|
51 |
+
alpha_presence = 0.4,
|
52 |
token_ban = [], # ban the generation of some tokens
|
53 |
token_stop = [0]) # stop generation whenever you see any token here
|
54 |
|
55 |
instruction = instruction.strip()
|
56 |
+
input=None
|
57 |
input = input.strip()
|
58 |
ctx = generate_prompt(instruction, input)
|
59 |
|
|
|
65 |
out_str = ''
|
66 |
occurrence = {}
|
67 |
state = None
|
68 |
+
for i in range(int(150)):
|
69 |
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
|
70 |
for n in occurrence:
|
71 |
out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
|
|
|
88 |
torch.cuda.empty_cache()
|
89 |
yield out_str.strip()
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
g = gr.Interface(
|
92 |
fn=evaluate,
|
93 |
inputs=[
|
94 |
gr.components.Textbox(lines=2, label="Instruction", value="Tell me about ravens."),
|
95 |
+
# gr.components.Textbox(lines=2, label="Input", placeholder="none"),
|
96 |
+
# gr.components.Slider(minimum=10, maximum=200, step=10, value=150), # token_count
|
97 |
+
# gr.components.Slider(minimum=0.2, maximum=2.0, step=0.1, value=1.0), # temperature
|
98 |
+
# gr.components.Slider(minimum=0, maximum=1, step=0.05, value=0.5), # top_p
|
99 |
+
# gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # presencePenalty
|
100 |
+
# gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # countPenalty
|
101 |
],
|
102 |
outputs=[
|
103 |
gr.inputs.Textbox(
|
|
|
105 |
label="Output",
|
106 |
)
|
107 |
],
|
108 |
+
title="🥳💬💕 - TalktoAI,随时随地,谈天说地!",
|
109 |
+
description="🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
|
110 |
+
article = "Powered by the RWKV Language Model"
|
|
|
111 |
)
|
112 |
g.queue(concurrency_count=1, max_size=10)
|
113 |
g.launch(share=False)
|