v5-EagleX-v2-7B-gradio

Runtime error

App Files Files Community

picocreator commited on Apr 18

Commit

f18f1da

•

1 Parent(s): f9523d4

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -3

app.py CHANGED Viewed

@@ -6,13 +6,15 @@ from pynvml import *
 nvmlInit()
 gpu_h = nvmlDeviceGetHandleByIndex(0)
 ctx_limit = 3000
-title = "RWKV-5-World-7B-v2-20240128-ctx4096"
 os.environ["RWKV_JIT_ON"] = '1'
 os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
 from rwkv.model import RWKV
-model_path = hf_hub_download(repo_id="BlinkDL/rwkv-5-world", filename=f"{title}.pth")
 model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
 from rwkv.utils import PIPELINE, PIPELINE_ARGS
 pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
@@ -137,7 +139,7 @@ English:''', 333, 1, 0.3, 0, 1],
 with gr.Blocks(title=title) as demo:
     gr.HTML(f"<div style=\"text-align: center;\">\n<h1>RWKV-5 World v2 - {title}</h1>\n</div>")
     with gr.Tab("Raw Generation"):
-        gr.Markdown(f"This is [RWKV-5 World v2](https://huggingface.co/BlinkDL/rwkv-5-world) with 7B params - a 100% attention-free RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM). Supports all 100+ world languages and code. And we have [200+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen {ctx_limit}.")
         with gr.Row():
             with gr.Column():
                 prompt = gr.Textbox(lines=2, label="Prompt", value="Assistant: How can we craft an engaging story featuring vampires on Mars? Let's think step by step and provide an expert response.")

 nvmlInit()
 gpu_h = nvmlDeviceGetHandleByIndex(0)
 ctx_limit = 3000
+title = "EagleX 2.25T Demo"
+description = f"This is [EagleX 7B 2.25T model](https://blog.rwkv.com/p/336f47bf-d8e9-4174-ac1d-02c6c8a99bc0) - based on the RWKV architecture a 100% attention-free RNN [RWKV-LM](https://wiki.rwkv.com). Supports 100+ world languages and code. And we have [200+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to context length of {ctx_limit}, download and run locally to run past context length limit"
 os.environ["RWKV_JIT_ON"] = '1'
 os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
 from rwkv.model import RWKV
+model_path = hf_hub_download(repo_id="RWKV/v5-EagleX-v2-7B-pth", filename=f"v5-EagleX-v2-7B.pth")
 model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
 from rwkv.utils import PIPELINE, PIPELINE_ARGS
 pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
 with gr.Blocks(title=title) as demo:
     gr.HTML(f"<div style=\"text-align: center;\">\n<h1>RWKV-5 World v2 - {title}</h1>\n</div>")
     with gr.Tab("Raw Generation"):
+        gr.Markdown(description)
         with gr.Row():
             with gr.Column():
                 prompt = gr.Textbox(lines=2, label="Prompt", value="Assistant: How can we craft an engaging story featuring vampires on Mars? Let's think step by step and provide an expert response.")