Update README.md
Browse files
README.md
CHANGED
@@ -83,6 +83,53 @@ Base Model: meta-llama/Llama-3.1-8B-Instruct
|
|
83 |
| Thai | Cambodian | 78.52 | 91.47 | 91.16 |
|
84 |
| Thai | Indonesian | 58.99 | 78.56 | 76.40 |
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
## Contributor
|
87 |
- ์์ธํธ
|
88 |
- ๊น๋ฏผ์ค
|
|
|
83 |
| Thai | Cambodian | 78.52 | 91.47 | 91.16 |
|
84 |
| Thai | Indonesian | 58.99 | 78.56 | 76.40 |
|
85 |
|
86 |
+
##
|
87 |
+
|
88 |
+
```py
|
89 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
90 |
+
|
91 |
+
model = AutoModelForCausalLM.from_pretrained(
|
92 |
+
"MLP-KTLim/llama-3.1-8B-Asian-Translator",
|
93 |
+
torch_dtype="auto",
|
94 |
+
device_map="auto",
|
95 |
+
)
|
96 |
+
|
97 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
98 |
+
"MLP-KTLim/llama-3.1-8B-Asian-Translator",
|
99 |
+
)
|
100 |
+
|
101 |
+
input_text = "์๋
ํ์ธ์? ์์์ ์ธ์ด ๋ฒ์ญ ๋ชจ๋ธ ์
๋๋ค."
|
102 |
+
|
103 |
+
def get_input_ids(source_lang, target_lang, message):
|
104 |
+
assert source_lang in ["Korean", "Vietnamese", "Indonesian", "Thai", "Cambodian"]
|
105 |
+
assert target_lang in ["Korean", "Vietnamese", "Indonesian", "Thai", "Cambodian"]
|
106 |
+
|
107 |
+
input_ids = tokenizer.apply_chat_template(
|
108 |
+
conversation=[
|
109 |
+
{"role": "system", "content": f"You are a useful translation AI. Please translate the sentence given in {source_lang} into {target_lang}."},
|
110 |
+
{"role": "user", "content": message},
|
111 |
+
],
|
112 |
+
tokenize=True,
|
113 |
+
return_tensors="pt",
|
114 |
+
add_generation_prompt=True,
|
115 |
+
)
|
116 |
+
return input_ids
|
117 |
+
|
118 |
+
input_ids = get_input_ids(
|
119 |
+
source_lang="Korean",
|
120 |
+
target_lang="Vietnamese",
|
121 |
+
message=input_text,
|
122 |
+
)
|
123 |
+
|
124 |
+
output = model.generate(
|
125 |
+
input_ids.to(model.device),
|
126 |
+
max_new_tokens=128,
|
127 |
+
)
|
128 |
+
|
129 |
+
print(tokenizer.decode(output[0][len(input_ids[0]):], skip_special_tokens=True))
|
130 |
+
```
|
131 |
+
|
132 |
+
|
133 |
## Contributor
|
134 |
- ์์ธํธ
|
135 |
- ๊น๋ฏผ์ค
|