Update app.py
Browse files
app.py
CHANGED
|
@@ -138,15 +138,15 @@ class Translators:
|
|
| 138 |
return translated_text
|
| 139 |
|
| 140 |
def hunyuan(self):
|
| 141 |
-
ZH_CODES = {"Chinese": "zh", "Traditional Chinese": "zh-Hant", "Cantonese": "yue"}
|
| 142 |
-
if self.sl in ZH_CODES.keys() or self.tl in ZH_CODES.keys():
|
| 143 |
-
|
| 144 |
-
else:
|
| 145 |
-
|
| 146 |
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 147 |
model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto", dtype=torch.bfloat16)
|
| 148 |
-
|
| 149 |
-
|
| 150 |
# Tokenize the conversation
|
| 151 |
tokenized_chat = tokenizer.apply_chat_template(
|
| 152 |
messages,
|
|
@@ -169,9 +169,8 @@ class Translators:
|
|
| 169 |
)
|
| 170 |
|
| 171 |
# outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=512, top_k=20, top_p=0.6, repetition_penalty=1.05, temperature=0.7)
|
| 172 |
-
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 173 |
-
# Decode only the new tokens
|
| 174 |
-
# output_text = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True)
|
| 175 |
return output_text
|
| 176 |
|
| 177 |
def HelsinkiNLP_mulroa(self):
|
|
|
|
| 138 |
return translated_text
|
| 139 |
|
| 140 |
def hunyuan(self):
|
| 141 |
+
# ZH_CODES = {"Chinese": "zh", "Traditional Chinese": "zh-Hant", "Cantonese": "yue"}
|
| 142 |
+
# if self.sl in ZH_CODES.keys() or self.tl in ZH_CODES.keys():
|
| 143 |
+
# prompt = f"把下面的文本翻译成{self.tl},不要额外解释。\n\n{self.input_text}"
|
| 144 |
+
# else:
|
| 145 |
+
prompt = f"Translate the following segment into {self.tl}, without additional explanation.\n\n{self.input_text}."
|
| 146 |
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 147 |
model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto", dtype=torch.bfloat16)
|
| 148 |
+
systemprompt = {"role": "system", "content": "You are a professional translator, translating in a formal tone and providing only translation, no other comments or explanations"
|
| 149 |
+
messages = [systemprompt, {"role": "user", "content": prompt}]
|
| 150 |
# Tokenize the conversation
|
| 151 |
tokenized_chat = tokenizer.apply_chat_template(
|
| 152 |
messages,
|
|
|
|
| 169 |
)
|
| 170 |
|
| 171 |
# outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=512, top_k=20, top_p=0.6, repetition_penalty=1.05, temperature=0.7)
|
| 172 |
+
# output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 173 |
+
output_text = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True) # Decode only the new tokens
|
|
|
|
| 174 |
return output_text
|
| 175 |
|
| 176 |
def HelsinkiNLP_mulroa(self):
|