TiberiuCristianLeon commited on
Commit
a720b38
·
verified ·
1 Parent(s): 4eb14c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -138,15 +138,15 @@ class Translators:
138
  return translated_text
139
 
140
  def hunyuan(self):
141
- ZH_CODES = {"Chinese": "zh", "Traditional Chinese": "zh-Hant", "Cantonese": "yue"}
142
- if self.sl in ZH_CODES.keys() or self.tl in ZH_CODES.keys():
143
- prompt = f"把下面的文本翻译成{self.tl},不要额外解释。\n\n{self.input_text}"
144
- else:
145
- prompt = f"Translate the following segment into {self.tl}, without additional explanation.\n\n{self.input_text}."
146
  tokenizer = AutoTokenizer.from_pretrained(self.model_name)
147
  model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto", dtype=torch.bfloat16)
148
- messages = [{"role": "system", "content": "You are a professional translator, translating in a formal tone and providing only translation, no other comments or explanations"},
149
- {"role": "user", "content": prompt}]
150
  # Tokenize the conversation
151
  tokenized_chat = tokenizer.apply_chat_template(
152
  messages,
@@ -169,9 +169,8 @@ class Translators:
169
  )
170
 
171
  # outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=512, top_k=20, top_p=0.6, repetition_penalty=1.05, temperature=0.7)
172
- output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
173
- # Decode only the new tokens
174
- # output_text = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True)
175
  return output_text
176
 
177
  def HelsinkiNLP_mulroa(self):
 
138
  return translated_text
139
 
140
  def hunyuan(self):
141
+ # ZH_CODES = {"Chinese": "zh", "Traditional Chinese": "zh-Hant", "Cantonese": "yue"}
142
+ # if self.sl in ZH_CODES.keys() or self.tl in ZH_CODES.keys():
143
+ # prompt = f"把下面的文本翻译成{self.tl},不要额外解释。\n\n{self.input_text}"
144
+ # else:
145
+ prompt = f"Translate the following segment into {self.tl}, without additional explanation.\n\n{self.input_text}."
146
  tokenizer = AutoTokenizer.from_pretrained(self.model_name)
147
  model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto", dtype=torch.bfloat16)
148
+ systemprompt = {"role": "system", "content": "You are a professional translator, translating in a formal tone and providing only translation, no other comments or explanations"
149
+ messages = [systemprompt, {"role": "user", "content": prompt}]
150
  # Tokenize the conversation
151
  tokenized_chat = tokenizer.apply_chat_template(
152
  messages,
 
169
  )
170
 
171
  # outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=512, top_k=20, top_p=0.6, repetition_penalty=1.05, temperature=0.7)
172
+ # output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
173
+ output_text = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True) # Decode only the new tokens
 
174
  return output_text
175
 
176
  def HelsinkiNLP_mulroa(self):