picocreator commited on
Commit
5709a62
1 Parent(s): 4080581

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -3
app.py CHANGED
@@ -94,7 +94,14 @@ Death does not concern us, because as long as we exist, death is not here. And w
94
  PREFIX_STATE = precompute_state(INSTRUCT_PREFIX)
95
 
96
  # Translation logic
97
- def translate(text, source_language, target_language, inState=PREFIX_STATE):
 
 
 
 
 
 
 
98
  prompt = f"## From {source_language}:\n{text}\n\n## To {target_language}:\n"
99
  ctx = prompt.strip()
100
  all_tokens = []
@@ -102,6 +109,9 @@ def translate(text, source_language, target_language, inState=PREFIX_STATE):
102
  out_str = ''
103
  occurrence = {}
104
 
 
 
 
105
  state = None
106
  if inState != None:
107
  state = universal_deepcopy(inState)
@@ -114,10 +124,21 @@ def translate(text, source_language, target_language, inState=PREFIX_STATE):
114
  # Generate things token by token
115
  for i in range(ctx_limit):
116
  out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
117
- token = pipeline.sample_logits(out, temperature=0.1, top_p=0.5)
 
 
 
118
  if token in [0]: # EOS token
119
  break
 
120
  all_tokens += [token]
 
 
 
 
 
 
 
121
  tmp = pipeline.decode(all_tokens[out_last:])
122
  if '\ufffd' not in tmp:
123
  out_str += tmp
@@ -166,7 +187,6 @@ def translate(text, source_language, target_language, inState=PREFIX_STATE):
166
  # Languages
167
  LANGUAGES = [
168
  "English",
169
- "Zombie Speak",
170
  "Chinese",
171
  "Spanish",
172
  "Bengali",
 
94
  PREFIX_STATE = precompute_state(INSTRUCT_PREFIX)
95
 
96
  # Translation logic
97
+ def translate(
98
+ text, source_language, target_language,
99
+ inState=PREFIX_STATE,
100
+ temperature=0.2,
101
+ top_p=0.5,
102
+ presencePenalty = 0.1,
103
+ countPenalty = 0.1,
104
+ ):
105
  prompt = f"## From {source_language}:\n{text}\n\n## To {target_language}:\n"
106
  ctx = prompt.strip()
107
  all_tokens = []
 
109
  out_str = ''
110
  occurrence = {}
111
 
112
+ alpha_frequency = countPenalty
113
+ alpha_presence = presencePenalty
114
+
115
  state = None
116
  if inState != None:
117
  state = universal_deepcopy(inState)
 
124
  # Generate things token by token
125
  for i in range(ctx_limit):
126
  out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
127
+ for n in occurrence:
128
+ out[n] -= (alpha_presence + occurrence[n] * alpha_frequency)
129
+ token = pipeline.sample_logits(out, temperature=temperature, top_p=top_p)
130
+
131
  if token in [0]: # EOS token
132
  break
133
+
134
  all_tokens += [token]
135
+ for xxx in occurrence:
136
+ occurrence[xxx] *= 0.996
137
+ if token not in occurrence:
138
+ occurrence[token] = 1
139
+ else:
140
+ occurrence[token] += 1
141
+
142
  tmp = pipeline.decode(all_tokens[out_last:])
143
  if '\ufffd' not in tmp:
144
  out_str += tmp
 
187
  # Languages
188
  LANGUAGES = [
189
  "English",
 
190
  "Chinese",
191
  "Spanish",
192
  "Bengali",