picocreator commited on
Commit
3cb35fd
1 Parent(s): 30f7265

Update app.py

Browse files

Redo the gradio for translation

Files changed (1) hide show
  1. app.py +45 -88
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
- import os, gc, copy, torch
3
- from datetime import datetime
4
  from huggingface_hub import hf_hub_download
5
  from pynvml import *
6
 
@@ -22,23 +21,22 @@ try:
22
  except NVMLError as error:
23
  print(error)
24
 
25
-
26
  os.environ["RWKV_JIT_ON"] = '1'
27
 
28
- # Model strat to use
29
- MODEL_STRAT="cpu bf16"
30
- os.environ["RWKV_CUDA_ON"] = '0' # if '1' then use CUDA kernel for seq mode (much faster)
31
 
32
  # Switch to GPU mode
33
- if HAS_GPU == True :
34
- os.environ["RWKV_CUDA_ON"] = '1'
35
  MODEL_STRAT = "cuda bf16"
36
 
37
- # Load the model accordingly
38
  from rwkv.model import RWKV
39
  model_path = hf_hub_download(repo_id="BlinkDL/rwkv-5-world", filename=f"{model_file}.pth")
40
  model = RWKV(model=model_path, strategy=MODEL_STRAT)
41
- from rwkv.utils import PIPELINE, PIPELINE_ARGS
42
  pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
43
 
44
  # Prompt generation
@@ -60,109 +58,68 @@ User: {instruction}
60
 
61
  Assistant:"""
62
 
63
- # Evaluation logic
64
- def evaluate(
65
- ctx,
66
- token_count=200,
67
- temperature=1.0,
68
- top_p=0.7,
69
- presencePenalty = 0.1,
70
- countPenalty = 0.1,
71
- ):
72
- args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
73
- alpha_frequency = countPenalty,
74
- alpha_presence = presencePenalty,
75
- token_ban = [], # ban the generation of some tokens
76
- token_stop = [0]) # stop generation whenever you see any token here
77
- ctx = ctx.strip()
78
  all_tokens = []
79
  out_last = 0
80
  out_str = ''
81
  occurrence = {}
82
  state = None
83
- for i in range(int(token_count)):
84
  out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
85
- for n in occurrence:
86
- out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
87
-
88
- token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
89
- if token in args.token_stop:
90
  break
91
  all_tokens += [token]
92
- for xxx in occurrence:
93
- occurrence[xxx] *= 0.996
94
- if token not in occurrence:
95
- occurrence[token] = 1
96
- else:
97
- occurrence[token] += 1
98
-
99
  tmp = pipeline.decode(all_tokens[out_last:])
100
  if '\ufffd' not in tmp:
101
  out_str += tmp
102
  yield out_str.strip()
103
  out_last = i + 1
104
 
105
- if HAS_GPU == True :
106
- gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
107
- print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
108
-
109
  del out
110
  del state
111
- gc.collect()
112
 
 
 
113
  if HAS_GPU == True :
114
  torch.cuda.empty_cache()
115
 
116
  yield out_str.strip()
117
 
118
- # Examples and gradio blocks
119
- examples = [
120
- ["Assistant: Sure! Here is a very detailed plan to create flying pigs:", 333, 1, 0.3, 0, 1],
121
- ["Assistant: Sure! Here are some ideas for FTL drive:", 333, 1, 0.3, 0, 1],
122
- [generate_prompt("Tell me about ravens."), 333, 1, 0.3, 0, 1],
123
- [generate_prompt("Écrivez un programme Python pour miner 1 Bitcoin, avec des commentaires."), 333, 1, 0.3, 0, 1],
124
- [generate_prompt("東京で訪れるべき素晴らしい場所とその紹介をいくつか挙げてください。"), 333, 1, 0.3, 0, 1],
125
- [generate_prompt("Write a story using the following information.", "A man named Alex chops a tree down."), 333, 1, 0.3, 0, 1],
126
- ["Assistant: Here is a very detailed plan to kill all mosquitoes:", 333, 1, 0.3, 0, 1],
127
- ['''Edward: I am Edward Elric from fullmetal alchemist. I am in the world of full metal alchemist and know nothing of the real world.
128
-
129
- User: Hello Edward. What have you been up to recently?
130
-
131
- Edward:''', 333, 1, 0.3, 0, 1],
132
- [generate_prompt("写一篇关于水利工程的流体力学模型的论文,需要详细全面。"), 333, 1, 0.3, 0, 1],
133
- ['''“当然可以,大宇宙不会因为这五公斤就不坍缩了。”关一帆说,他还有一个没说出来的想法:也许大宇宙真的会因为相差一个原子的质量而由封闭转为开放。大自然的精巧有时超出想象,比如生命的诞生,就需要各项宇宙参数在几亿亿分之一精度上的精确配合。但程心仍然可以留下她的生态球,因为在那无数文明创造的无数小宇宙中,肯定有相当一部分不响应回归运动的号召,所以,大宇宙最终被夺走的质量至少有几亿吨,甚至可能是几亿亿亿吨。
134
- 但愿大宇宙能够忽略这个误差。
135
- 程心和关一帆进入了飞船,智子最后也进来了。她早就不再穿那身华丽的和服了,她现在身着迷彩服,再次成为一名轻捷精悍的战士,她的身上佩带着许多武器和生存装备,最引人注目的是那把插在背后的武士刀。
136
- “放心,我在,你们就在!”智子对两位人类朋友说。
137
- 聚变发动机启动了,推进器发出幽幽的蓝光,飞船缓缓地穿过了宇宙之门。
138
- 小宇宙中只剩下漂流瓶和生态球。漂流瓶隐没于黑暗里,在一千米见方的宇宙中,只有生态球里的小太阳发出一点光芒。在这个小小的生命世界中,几只清澈的水球在零重力环境中静静地飘浮着,有一条小鱼从一只水球中蹦出,跃入另一只水球,轻盈地穿游于绿藻之间。在一小块陆地上的草丛中,有一滴露珠从一片草叶上脱离,旋转着飘起,向太空中折射出一缕晶莹的阳光。''', 333, 1, 0.3, 0, 1],
139
  ]
140
 
141
- ##########################################################################
142
-
143
- # Gradio blocks
144
  with gr.Blocks(title=title) as demo:
145
- gr.HTML(f"<div style=\"text-align: center;\">\n<h1>RWKV-5 World v2 - {title}</h1>\n</div>")
146
- with gr.Tab("Raw Generation"):
147
- gr.Markdown(f"This is [RWKV-5 World v2](https://huggingface.co/BlinkDL/rwkv-5-world) with 1.5B params - a 100% attention-free RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM). Supports all 100+ world languages and code. And we have [200+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen {ctx_limit}.")
148
- with gr.Row():
149
- with gr.Column():
150
- prompt = gr.Textbox(lines=2, label="Prompt", value="Assistant: Sure! Here is a very detailed plan to create flying pigs:")
151
- token_count = gr.Slider(10, 333, label="Max Tokens", step=10, value=333)
152
- temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.0)
153
- top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.3)
154
- presence_penalty = gr.Slider(0.0, 1.0, label="Presence Penalty", step=0.1, value=0)
155
- count_penalty = gr.Slider(0.0, 1.0, label="Count Penalty", step=0.1, value=1)
156
- with gr.Column():
157
- with gr.Row():
158
- submit = gr.Button("Submit", variant="primary")
159
- clear = gr.Button("Clear", variant="secondary")
160
- output = gr.Textbox(label="Output", lines=5)
161
- data = gr.Dataset(components=[prompt, token_count, temperature, top_p, presence_penalty, count_penalty], samples=examples, label="Example Instructions", headers=["Prompt", "Max Tokens", "Temperature", "Top P", "Presence Penalty", "Count Penalty"])
162
- submit.click(evaluate, [prompt, token_count, temperature, top_p, presence_penalty, count_penalty], [output])
163
- clear.click(lambda: None, [], [output])
164
- data.click(lambda x: x, [data], [prompt, token_count, temperature, top_p, presence_penalty, count_penalty])
165
 
166
  # Gradio launch
167
  demo.queue(concurrency_count=1, max_size=10)
168
- demo.launch(share=False)
 
1
  import gradio as gr
2
+ import os, gc
 
3
  from huggingface_hub import hf_hub_download
4
  from pynvml import *
5
 
 
21
  except NVMLError as error:
22
  print(error)
23
 
 
24
  os.environ["RWKV_JIT_ON"] = '1'
25
 
26
+ # Model strategy to use
27
+ MODEL_STRAT = "cpu bf16"
28
+ os.environ["RWKV_CUDA_ON"] = '0' # if '1' then use CUDA kernel for seq mode (much faster)
29
 
30
  # Switch to GPU mode
31
+ if HAS_GPU:
32
+ os.environ["RWKV_CUDA_ON"] = '1'
33
  MODEL_STRAT = "cuda bf16"
34
 
35
+ # Load the model
36
  from rwkv.model import RWKV
37
  model_path = hf_hub_download(repo_id="BlinkDL/rwkv-5-world", filename=f"{model_file}.pth")
38
  model = RWKV(model=model_path, strategy=MODEL_STRAT)
39
+ from rwkv.utils import PIPELINE
40
  pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
41
 
42
  # Prompt generation
 
58
 
59
  Assistant:"""
60
 
61
+ # Translation logic
62
+ def translate(text, target_language):
63
+ prompt = f"Translate the following English text to {target_language}: '{text}'"
64
+ ctx = prompt.strip()
 
 
 
 
 
 
 
 
 
 
 
65
  all_tokens = []
66
  out_last = 0
67
  out_str = ''
68
  occurrence = {}
69
  state = None
70
+ for i in range(ctx_limit):
71
  out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
72
+ token = pipeline.sample_logits(out)
73
+ if token in [0]: # EOS token
 
 
 
74
  break
75
  all_tokens += [token]
 
 
 
 
 
 
 
76
  tmp = pipeline.decode(all_tokens[out_last:])
77
  if '\ufffd' not in tmp:
78
  out_str += tmp
79
  yield out_str.strip()
80
  out_last = i + 1
81
 
 
 
 
 
82
  del out
83
  del state
 
84
 
85
+ # Clear GC
86
+ gc.collect()
87
  if HAS_GPU == True :
88
  torch.cuda.empty_cache()
89
 
90
  yield out_str.strip()
91
 
92
+ # Example data
93
+ EXAMPLES = [
94
+ ["Hello, how are you?", "French"],
95
+ ["Hello, how are you?", "Spanish"],
96
+ ["Hello, how are you?", "Chinese"],
97
+ ["Bonjour, comment ça va?", "English"],
98
+ ["Hola, ¿cómo estás?", "English"],
99
+ ["你好吗?", "English"],
100
+ ["Guten Tag, wie geht es Ihnen?", "English"],
101
+ ["Привет, как ты?", "English"],
102
+ ["مرحبًا ، كيف حالك؟", "English"],
 
 
 
 
 
 
 
 
 
 
103
  ]
104
 
105
+ # Gradio interface
 
 
106
  with gr.Blocks(title=title) as demo:
107
+ gr.HTML(f"<div style=\"text-align: center;\"><h1>RWKV-5 World v2 - {title}</h1></div>")
108
+ gr.Markdown("This is the RWKV-5 World v2 1B5 model tailored for translation. Please provide the text and select the target language for translation.")
109
+
110
+ # Input and output components
111
+ text = gr.Textbox(lines=5, label="English Text", placeholder="Enter the text you want to translate...")
112
+ target_language = gr.Dropdown(choices=["French", "Spanish", "German", "Chinese", "Japanese", "Russian", "Arabic"], label="Target Language")
113
+ output = gr.Textbox(lines=5, label="Translated Text")
114
+ submit = gr.Button("Translate", variant="primary")
115
+
116
+ # Example data
117
+ data = gr.Dataset(components=[text, target_language], samples=EXAMPLES, label="Example Translations", headers=["Text", "Target Language"])
118
+
119
+ # Button action
120
+ submit.click(translate, [text, target_language], [output])
121
+ data.click(lambda x: x, [data], [text, target_language])
 
 
 
 
 
122
 
123
  # Gradio launch
124
  demo.queue(concurrency_count=1, max_size=10)
125
+ demo.launch(share=False)