picocreator commited on
Commit
f18f1da
1 Parent(s): f9523d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -6,13 +6,15 @@ from pynvml import *
6
  nvmlInit()
7
  gpu_h = nvmlDeviceGetHandleByIndex(0)
8
  ctx_limit = 3000
9
- title = "RWKV-5-World-7B-v2-20240128-ctx4096"
 
 
10
 
11
  os.environ["RWKV_JIT_ON"] = '1'
12
  os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
13
 
14
  from rwkv.model import RWKV
15
- model_path = hf_hub_download(repo_id="BlinkDL/rwkv-5-world", filename=f"{title}.pth")
16
  model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
17
  from rwkv.utils import PIPELINE, PIPELINE_ARGS
18
  pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
@@ -137,7 +139,7 @@ English:''', 333, 1, 0.3, 0, 1],
137
  with gr.Blocks(title=title) as demo:
138
  gr.HTML(f"<div style=\"text-align: center;\">\n<h1>RWKV-5 World v2 - {title}</h1>\n</div>")
139
  with gr.Tab("Raw Generation"):
140
- gr.Markdown(f"This is [RWKV-5 World v2](https://huggingface.co/BlinkDL/rwkv-5-world) with 7B params - a 100% attention-free RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM). Supports all 100+ world languages and code. And we have [200+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen {ctx_limit}.")
141
  with gr.Row():
142
  with gr.Column():
143
  prompt = gr.Textbox(lines=2, label="Prompt", value="Assistant: How can we craft an engaging story featuring vampires on Mars? Let's think step by step and provide an expert response.")
 
6
  nvmlInit()
7
  gpu_h = nvmlDeviceGetHandleByIndex(0)
8
  ctx_limit = 3000
9
+
10
+ title = "EagleX 2.25T Demo"
11
+ description = f"This is [EagleX 7B 2.25T model](https://blog.rwkv.com/p/336f47bf-d8e9-4174-ac1d-02c6c8a99bc0) - based on the RWKV architecture a 100% attention-free RNN [RWKV-LM](https://wiki.rwkv.com). Supports 100+ world languages and code. And we have [200+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to context length of {ctx_limit}, download and run locally to run past context length limit"
12
 
13
  os.environ["RWKV_JIT_ON"] = '1'
14
  os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
15
 
16
  from rwkv.model import RWKV
17
+ model_path = hf_hub_download(repo_id="RWKV/v5-EagleX-v2-7B-pth", filename=f"v5-EagleX-v2-7B.pth")
18
  model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
19
  from rwkv.utils import PIPELINE, PIPELINE_ARGS
20
  pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
 
139
  with gr.Blocks(title=title) as demo:
140
  gr.HTML(f"<div style=\"text-align: center;\">\n<h1>RWKV-5 World v2 - {title}</h1>\n</div>")
141
  with gr.Tab("Raw Generation"):
142
+ gr.Markdown(description)
143
  with gr.Row():
144
  with gr.Column():
145
  prompt = gr.Textbox(lines=2, label="Prompt", value="Assistant: How can we craft an engaging story featuring vampires on Mars? Let's think step by step and provide an expert response.")