Spaces:

TiberiuCristianLeon
/

StreamlitTranslate

Running

App Files Files

TiberiuCristianLeon commited on 24 days ago

Commit

a720b38

verified ·

1 Parent(s): 4eb14c9

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -10

app.py CHANGED Viewed

@@ -138,15 +138,15 @@ class Translators:
         return translated_text
     def hunyuan(self):
-        ZH_CODES = {"Chinese": "zh", "Traditional Chinese": "zh-Hant", "Cantonese": "yue"}
-        if self.sl in ZH_CODES.keys() or self.tl in ZH_CODES.keys():
-            prompt = f"把下面的文本翻译成{self.tl}，不要额外解释。\n\n{self.input_text}"
-        else:
-            prompt = f"Translate the following segment into {self.tl}, without additional explanation.\n\n{self.input_text}."
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto", dtype=torch.bfloat16)
-        messages = [{"role": "system", "content": "You are a professional translator, translating in a formal tone and providing only translation, no other comments or explanations"},
-                    {"role": "user", "content": prompt}]
         # Tokenize the conversation
         tokenized_chat = tokenizer.apply_chat_template(
             messages,
@@ -169,9 +169,8 @@ class Translators:
             )
         # outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=512, top_k=20, top_p=0.6, repetition_penalty=1.05, temperature=0.7)
-        output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Decode only the new tokens
-        # output_text = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True)
         return output_text
     def HelsinkiNLP_mulroa(self):

         return translated_text
     def hunyuan(self):
+        # ZH_CODES = {"Chinese": "zh", "Traditional Chinese": "zh-Hant", "Cantonese": "yue"}
+        # if self.sl in ZH_CODES.keys() or self.tl in ZH_CODES.keys():
+        #     prompt = f"把下面的文本翻译成{self.tl}，不要额外解释。\n\n{self.input_text}"
+        # else:
+        prompt = f"Translate the following segment into {self.tl}, without additional explanation.\n\n{self.input_text}."
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto", dtype=torch.bfloat16)
+        systemprompt = {"role": "system", "content": "You are a professional translator, translating in a formal tone and providing only translation, no other comments or explanations"
+        messages = [systemprompt, {"role": "user", "content": prompt}]
         # Tokenize the conversation
         tokenized_chat = tokenizer.apply_chat_template(
             messages,
             )
         # outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=512, top_k=20, top_p=0.6, repetition_penalty=1.05, temperature=0.7)
+        # output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        output_text = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True) # Decode only the new tokens
         return output_text
     def HelsinkiNLP_mulroa(self):