Kevin676 commited on
Commit
62e7d87
1 Parent(s): 06083ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -31
app.py CHANGED
@@ -6,7 +6,6 @@ from pynvml import *
6
  nvmlInit()
7
  gpu_h = nvmlDeviceGetHandleByIndex(0)
8
  ctx_limit = 1024
9
- title = "RWKV-4-Raven-7B-v8-Eng-20230408-ctx4096"
10
 
11
  os.environ["RWKV_JIT_ON"] = '1'
12
  os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
@@ -40,20 +39,21 @@ def generate_prompt(instruction, input=None):
40
 
41
  def evaluate(
42
  instruction,
43
- input=None,
44
- token_count=200,
45
- temperature=1.0,
46
- top_p=0.7,
47
- presencePenalty = 0.1,
48
- countPenalty = 0.1,
49
  ):
50
- args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
51
- alpha_frequency = countPenalty,
52
- alpha_presence = presencePenalty,
53
  token_ban = [], # ban the generation of some tokens
54
  token_stop = [0]) # stop generation whenever you see any token here
55
 
56
  instruction = instruction.strip()
 
57
  input = input.strip()
58
  ctx = generate_prompt(instruction, input)
59
 
@@ -65,7 +65,7 @@ def evaluate(
65
  out_str = ''
66
  occurrence = {}
67
  state = None
68
- for i in range(int(token_count)):
69
  out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
70
  for n in occurrence:
71
  out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
@@ -88,26 +88,16 @@ def evaluate(
88
  torch.cuda.empty_cache()
89
  yield out_str.strip()
90
 
91
- examples = [
92
- ["Tell me about ravens.", "", 150, 1.0, 0.5, 0.4, 0.4],
93
- ["Write a python function to mine 1 BTC, with details and comments.", "", 150, 1.0, 0.5, 0.2, 0.2],
94
- ["Write a song about ravens.", "", 150, 1.0, 0.5, 0.4, 0.4],
95
- ["Explain the following metaphor: Life is like cats.", "", 150, 1.0, 0.5, 0.4, 0.4],
96
- ["Write a story using the following information", "A man named Alex chops a tree down", 150, 1.0, 0.5, 0.4, 0.4],
97
- ["Generate a list of adjectives that describe a person as brave.", "", 150, 1.0, 0.5, 0.4, 0.4],
98
- ["You have $100, and your goal is to turn that into as much money as possible with AI and Machine Learning. Please respond with detailed plan.", "", 150, 1.0, 0.5, 0.4, 0.4],
99
- ]
100
-
101
  g = gr.Interface(
102
  fn=evaluate,
103
  inputs=[
104
  gr.components.Textbox(lines=2, label="Instruction", value="Tell me about ravens."),
105
- gr.components.Textbox(lines=2, label="Input", placeholder="none"),
106
- gr.components.Slider(minimum=10, maximum=200, step=10, value=150), # token_count
107
- gr.components.Slider(minimum=0.2, maximum=2.0, step=0.1, value=1.0), # temperature
108
- gr.components.Slider(minimum=0, maximum=1, step=0.05, value=0.5), # top_p
109
- gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # presencePenalty
110
- gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # countPenalty
111
  ],
112
  outputs=[
113
  gr.inputs.Textbox(
@@ -115,10 +105,9 @@ g = gr.Interface(
115
  label="Output",
116
  )
117
  ],
118
- title=f"🐦Raven - {title}",
119
- description="Raven is [RWKV 7B](https://github.com/BlinkDL/ChatRWKV) 100% RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) finetuned to follow instructions. *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen 1024. It is finetuned on [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca), codealpaca and more. For best results, *** keep you prompt short and clear ***.",
120
- examples=examples,
121
- cache_examples=False,
122
  )
123
  g.queue(concurrency_count=1, max_size=10)
124
  g.launch(share=False)
 
6
  nvmlInit()
7
  gpu_h = nvmlDeviceGetHandleByIndex(0)
8
  ctx_limit = 1024
 
9
 
10
  os.environ["RWKV_JIT_ON"] = '1'
11
  os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
 
39
 
40
  def evaluate(
41
  instruction,
42
+ # input=None,
43
+ # token_count=200,
44
+ # temperature=1.0,
45
+ # top_p=0.7,
46
+ # presencePenalty = 0.1,
47
+ # countPenalty = 0.1,
48
  ):
49
+ args = PIPELINE_ARGS(temperature = max(0.2, float(1)), top_p = float(0.5),
50
+ alpha_frequency = 0.4,
51
+ alpha_presence = 0.4,
52
  token_ban = [], # ban the generation of some tokens
53
  token_stop = [0]) # stop generation whenever you see any token here
54
 
55
  instruction = instruction.strip()
56
+ input=None
57
  input = input.strip()
58
  ctx = generate_prompt(instruction, input)
59
 
 
65
  out_str = ''
66
  occurrence = {}
67
  state = None
68
+ for i in range(int(150)):
69
  out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
70
  for n in occurrence:
71
  out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
 
88
  torch.cuda.empty_cache()
89
  yield out_str.strip()
90
 
 
 
 
 
 
 
 
 
 
 
91
  g = gr.Interface(
92
  fn=evaluate,
93
  inputs=[
94
  gr.components.Textbox(lines=2, label="Instruction", value="Tell me about ravens."),
95
+ # gr.components.Textbox(lines=2, label="Input", placeholder="none"),
96
+ # gr.components.Slider(minimum=10, maximum=200, step=10, value=150), # token_count
97
+ # gr.components.Slider(minimum=0.2, maximum=2.0, step=0.1, value=1.0), # temperature
98
+ # gr.components.Slider(minimum=0, maximum=1, step=0.05, value=0.5), # top_p
99
+ # gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # presencePenalty
100
+ # gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # countPenalty
101
  ],
102
  outputs=[
103
  gr.inputs.Textbox(
 
105
  label="Output",
106
  )
107
  ],
108
+ title="🥳💬💕 - TalktoAI,随时随地,谈天说地!",
109
+ description="🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
110
+ article = "Powered by the RWKV Language Model"
 
111
  )
112
  g.queue(concurrency_count=1, max_size=10)
113
  g.launch(share=False)