Spaces:
Sleeping
Sleeping
Commit
•
5709a62
1
Parent(s):
4080581
Update app.py
Browse files
app.py
CHANGED
@@ -94,7 +94,14 @@ Death does not concern us, because as long as we exist, death is not here. And w
|
|
94 |
PREFIX_STATE = precompute_state(INSTRUCT_PREFIX)
|
95 |
|
96 |
# Translation logic
|
97 |
-
def translate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
prompt = f"## From {source_language}:\n{text}\n\n## To {target_language}:\n"
|
99 |
ctx = prompt.strip()
|
100 |
all_tokens = []
|
@@ -102,6 +109,9 @@ def translate(text, source_language, target_language, inState=PREFIX_STATE):
|
|
102 |
out_str = ''
|
103 |
occurrence = {}
|
104 |
|
|
|
|
|
|
|
105 |
state = None
|
106 |
if inState != None:
|
107 |
state = universal_deepcopy(inState)
|
@@ -114,10 +124,21 @@ def translate(text, source_language, target_language, inState=PREFIX_STATE):
|
|
114 |
# Generate things token by token
|
115 |
for i in range(ctx_limit):
|
116 |
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
|
117 |
-
|
|
|
|
|
|
|
118 |
if token in [0]: # EOS token
|
119 |
break
|
|
|
120 |
all_tokens += [token]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
tmp = pipeline.decode(all_tokens[out_last:])
|
122 |
if '\ufffd' not in tmp:
|
123 |
out_str += tmp
|
@@ -166,7 +187,6 @@ def translate(text, source_language, target_language, inState=PREFIX_STATE):
|
|
166 |
# Languages
|
167 |
LANGUAGES = [
|
168 |
"English",
|
169 |
-
"Zombie Speak",
|
170 |
"Chinese",
|
171 |
"Spanish",
|
172 |
"Bengali",
|
|
|
94 |
PREFIX_STATE = precompute_state(INSTRUCT_PREFIX)
|
95 |
|
96 |
# Translation logic
|
97 |
+
def translate(
|
98 |
+
text, source_language, target_language,
|
99 |
+
inState=PREFIX_STATE,
|
100 |
+
temperature=0.2,
|
101 |
+
top_p=0.5,
|
102 |
+
presencePenalty = 0.1,
|
103 |
+
countPenalty = 0.1,
|
104 |
+
):
|
105 |
prompt = f"## From {source_language}:\n{text}\n\n## To {target_language}:\n"
|
106 |
ctx = prompt.strip()
|
107 |
all_tokens = []
|
|
|
109 |
out_str = ''
|
110 |
occurrence = {}
|
111 |
|
112 |
+
alpha_frequency = countPenalty
|
113 |
+
alpha_presence = presencePenalty
|
114 |
+
|
115 |
state = None
|
116 |
if inState != None:
|
117 |
state = universal_deepcopy(inState)
|
|
|
124 |
# Generate things token by token
|
125 |
for i in range(ctx_limit):
|
126 |
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
|
127 |
+
for n in occurrence:
|
128 |
+
out[n] -= (alpha_presence + occurrence[n] * alpha_frequency)
|
129 |
+
token = pipeline.sample_logits(out, temperature=temperature, top_p=top_p)
|
130 |
+
|
131 |
if token in [0]: # EOS token
|
132 |
break
|
133 |
+
|
134 |
all_tokens += [token]
|
135 |
+
for xxx in occurrence:
|
136 |
+
occurrence[xxx] *= 0.996
|
137 |
+
if token not in occurrence:
|
138 |
+
occurrence[token] = 1
|
139 |
+
else:
|
140 |
+
occurrence[token] += 1
|
141 |
+
|
142 |
tmp = pipeline.decode(all_tokens[out_last:])
|
143 |
if '\ufffd' not in tmp:
|
144 |
out_str += tmp
|
|
|
187 |
# Languages
|
188 |
LANGUAGES = [
|
189 |
"English",
|
|
|
190 |
"Chinese",
|
191 |
"Spanish",
|
192 |
"Bengali",
|