Spaces:
Sleeping
Sleeping
Commit
•
5833996
1
Parent(s):
040cd58
Update app.py
Browse filesAdding default text, instruct prefix, and more examples
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
import os, gc
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
from pynvml import *
|
5 |
|
@@ -39,15 +39,70 @@ model = RWKV(model=model_path, strategy=MODEL_STRAT)
|
|
39 |
from rwkv.utils import PIPELINE
|
40 |
pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
# Translation logic
|
43 |
-
def translate(text, target_language):
|
44 |
prompt = f"Translate the following text to {target_language}\n # Input Text:\n{text}\n\n# Output Text:\n"
|
45 |
ctx = prompt.strip()
|
46 |
all_tokens = []
|
47 |
out_last = 0
|
48 |
out_str = ''
|
49 |
occurrence = {}
|
|
|
50 |
state = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
for i in range(ctx_limit):
|
52 |
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
|
53 |
token = pipeline.sample_logits(out)
|
@@ -60,19 +115,24 @@ def translate(text, target_language):
|
|
60 |
yield out_str.strip()
|
61 |
out_last = i + 1
|
62 |
|
|
|
|
|
|
|
|
|
63 |
del out
|
64 |
del state
|
65 |
|
66 |
-
# Clear GC
|
67 |
-
gc.collect()
|
68 |
-
if HAS_GPU == True :
|
69 |
-
|
70 |
|
71 |
yield out_str.strip()
|
72 |
|
73 |
# Languages
|
74 |
LANGUAGES = [
|
75 |
"English",
|
|
|
76 |
"Chinese",
|
77 |
"Spanish",
|
78 |
"Bengali",
|
@@ -183,6 +243,8 @@ LANGUAGES = [
|
|
183 |
|
184 |
# Example data
|
185 |
EXAMPLES = [
|
|
|
|
|
186 |
["Hello, how are you?", "French"],
|
187 |
["Hello, how are you?", "Spanish"],
|
188 |
["Hello, how are you?", "Chinese"],
|
@@ -197,11 +259,11 @@ EXAMPLES = [
|
|
197 |
# Gradio interface
|
198 |
with gr.Blocks(title=title) as demo:
|
199 |
gr.HTML(f"<div style=\"text-align: center;\"><h1>RWKV-5 World v2 - {title}</h1></div>")
|
200 |
-
gr.Markdown("This is the RWKV-5 World v2 1B5 model tailored for translation.
|
201 |
|
202 |
# Input and output components
|
203 |
-
text = gr.Textbox(lines=5, label="Source Text", placeholder="Enter the text you want to translate...")
|
204 |
-
target_language = gr.Dropdown(choices=LANGUAGES, label="Target Language")
|
205 |
output = gr.Textbox(lines=5, label="Translated Text")
|
206 |
submit = gr.Button("Translate", variant="primary")
|
207 |
|
|
|
1 |
import gradio as gr
|
2 |
+
import os, gc, copy
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
from pynvml import *
|
5 |
|
|
|
39 |
from rwkv.utils import PIPELINE
|
40 |
pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
|
41 |
|
42 |
+
# Precomputation of the state
|
43 |
+
def precompute_state(text):
|
44 |
+
state = None
|
45 |
+
text_encoded = pipeline.encode(text)
|
46 |
+
_, state = model.forward(text_encoded)
|
47 |
+
yield state
|
48 |
+
|
49 |
+
# Precomputing the base instruction set
|
50 |
+
INSTRUCT_PREFIX = f'''
|
51 |
+
The following is a set of instruction rules, that can translate spoken text to zombie speak. And vice visa.
|
52 |
+
|
53 |
+
# Zombie Speak Rules:
|
54 |
+
- Replace syllables with "uh" or "argh"
|
55 |
+
- Add "uh" and "argh" sounds between words
|
56 |
+
- Repeat words and letters, especially vowels
|
57 |
+
- Use broken grammar and omit small words like "the", "a", "is"
|
58 |
+
|
59 |
+
# To go from zombie speak back to English:
|
60 |
+
- Remove extra "uh" and "argh" sounds
|
61 |
+
- Replace repeated letters with one instance
|
62 |
+
- Add omitted small words like "the", "a", "is" back in
|
63 |
+
- Fix grammar and sentence structure
|
64 |
+
|
65 |
+
# Here are several examples:
|
66 |
+
|
67 |
+
## English:
|
68 |
+
"Hello my friend, how are you today?"
|
69 |
+
## Zombie:
|
70 |
+
"Hell-uh-argh myuh fruh-end, hargh-owuh argh yuh-uh toduh-ay?"
|
71 |
+
|
72 |
+
## Zombie:
|
73 |
+
"Brargh-ains argh-uh foo-duh"
|
74 |
+
## English:
|
75 |
+
"Brains are food"
|
76 |
+
|
77 |
+
## English:
|
78 |
+
"Good morning! How are you today? I hope you are having a nice day. The weather is supposed to be sunny and warm this afternoon. Maybe we could go for a nice walk together and stop to get ice cream. That would be very enjoyable. Well, I will talk to you soon!"
|
79 |
+
## Zombie:
|
80 |
+
"Guh-ood morngh-ing! Hargh-owuh argh yuh-uh toduh-ay? Iuh hargh-ope yuh-uh argh havi-uh-nguh nuh-ice duh-ay. Thuh-uh weath-uh-eruh izzuh suh-pose-duh tuh-uh beh sunn-eh an-duh war-muh thizuh aft-erng-oon. May-buh-uh weh coulduh gargh-oh fargh-oruh nuh-ice wal-guh-kuh toge-the-ruh an-duh stargh-op tuh-uh geh-etuh izz-creem. Tha-at wou-duh beh ve-reh uhn-joy-ab-buhl. Well, I wih-ll targh-alk tuh-uh yuh-oo soo-oon!"
|
81 |
+
|
82 |
+
'''
|
83 |
+
|
84 |
+
# Get the prefix state
|
85 |
+
PREFIX_STATE = precompute_state(INSTRUCT_PREFIX)
|
86 |
+
|
87 |
# Translation logic
|
88 |
+
def translate(text, target_language, inState=PREFIX_STATE):
|
89 |
prompt = f"Translate the following text to {target_language}\n # Input Text:\n{text}\n\n# Output Text:\n"
|
90 |
ctx = prompt.strip()
|
91 |
all_tokens = []
|
92 |
out_last = 0
|
93 |
out_str = ''
|
94 |
occurrence = {}
|
95 |
+
|
96 |
state = None
|
97 |
+
if inState != None:
|
98 |
+
state = copy.deepcopy(inState)
|
99 |
+
|
100 |
+
# Clear GC
|
101 |
+
gc.collect()
|
102 |
+
if HAS_GPU == True :
|
103 |
+
torch.cuda.empty_cache()
|
104 |
+
|
105 |
+
# Generate things token by token
|
106 |
for i in range(ctx_limit):
|
107 |
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
|
108 |
token = pipeline.sample_logits(out)
|
|
|
115 |
yield out_str.strip()
|
116 |
out_last = i + 1
|
117 |
|
118 |
+
if "# " in out_str and "\n#" in out_str
|
119 |
+
out_str = out_str.split("\n## ")[0].split("\n# ")[0]
|
120 |
+
yield out_str.strip()
|
121 |
+
|
122 |
del out
|
123 |
del state
|
124 |
|
125 |
+
# # Clear GC
|
126 |
+
# gc.collect()
|
127 |
+
# if HAS_GPU == True :
|
128 |
+
# torch.cuda.empty_cache()
|
129 |
|
130 |
yield out_str.strip()
|
131 |
|
132 |
# Languages
|
133 |
LANGUAGES = [
|
134 |
"English",
|
135 |
+
"Zombie Speak",
|
136 |
"Chinese",
|
137 |
"Spanish",
|
138 |
"Bengali",
|
|
|
243 |
|
244 |
# Example data
|
245 |
EXAMPLES = [
|
246 |
+
["Brargh-ains argh-uh foo-duh", "English"],
|
247 |
+
["I Want to eat your brains", "Zombie Speak"],
|
248 |
["Hello, how are you?", "French"],
|
249 |
["Hello, how are you?", "Spanish"],
|
250 |
["Hello, how are you?", "Chinese"],
|
|
|
259 |
# Gradio interface
|
260 |
with gr.Blocks(title=title) as demo:
|
261 |
gr.HTML(f"<div style=\"text-align: center;\"><h1>RWKV-5 World v2 - {title}</h1></div>")
|
262 |
+
gr.Markdown("This is the RWKV-5 World v2 1B5 model tailored for translation. With a halloween zombie speak twist")
|
263 |
|
264 |
# Input and output components
|
265 |
+
text = gr.Textbox(lines=5, label="Source Text", placeholder="Enter the text you want to translate...", default=EXAMPLES[0][0])
|
266 |
+
target_language = gr.Dropdown(choices=LANGUAGES, label="Target Language", default=EXAMPLES[0][1])
|
267 |
output = gr.Textbox(lines=5, label="Translated Text")
|
268 |
submit = gr.Button("Translate", variant="primary")
|
269 |
|