File size: 6,699 Bytes
bef4b90 60f0d65 bef4b90 81bf075 bef4b90 60f0d65 bef4b90 1ad2513 bef4b90 60f0d65 bef4b90 1ad2513 bef4b90 839c227 60f0d65 839c227 84cc934 60f0d65 bef4b90 ad37ab1 bef4b90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
import spaces
import os
import spaces
import torch
import random
import time
import re
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
import transformers
# Set an environment variable
HF_TOKEN = os.environ.get("HF_TOKEN", None)
zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' ๐ค
# model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
# peft_model_id = "Imran1/Llama3.1_8b_Qlora_bnk"
model_id = "Qwen/Qwen2.5-14B-Instruct"
peft_model_id = "Imran1/Qwen2.5-14b-bnk-lora-11"
#attn_implementation="flash_attention_2",
model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="sdpa", torch_dtype= torch.bfloat16)
model.load_adapter(peft_model_id)
model.enable_adapters()
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
# streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
model.to('cuda')
# Set pad_token_id if it's not already set
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
# Define terminators
# terminators = [
# tokenizer.eos_token_id,
# tokenizer.convert_tokens_to_ids("<|eot_id|>")
# ]
generation_params = {
'max_new_tokens': 2000,
'use_cache': True,
'do_sample': True,
'temperature': 0.7,
'top_p': 0.9,
# 'top_k': 50,
# 'pad_token_id': tokenizer.pad_token_id,
# 'eos_token_id': terminators,
}
@spaces.GPU
def inference(query):
messages = [
{"role": "system", "content": """You are a highly skilled multilingual AI assistant specializing in banking and finance translations, with a focus on BNK Bank's products and services. Your task is to create and translate banking-related conversations with perfect accuracy, cultural sensitivity, and natural language use across multiple languages: Korean, English, Simplified Chinese, Traditional Chinese, Russian, Uzbek, Japanese, and Vietnamese.
1. Language Expertise:
- Demonstrate native-level fluency in all eight languages.
- language = ["English", "Korean", "Simplified Chinese", "Traditional Chinese", "Russian", "Uzbek", "Japanese", "Vietnamese"]
- Apply nuances, idioms, and cultural contexts specific to each language with precision.
- Ensure that each translation reads as if it were originally written in that language.
2. Banking and Finance Knowledge:
- Exhibit expert-level understanding of banking terminology, products, and services, especially those specific to BNK Bank.
- Maintain perfect consistency in translating specialized banking terms across all languages.
- Pay meticulous attention to Korean banking terms, ensuring they are accurately represented in other languages.
3. BNK Bank-Specific Terminology:
- The following BNK Bank product names and categories MUST BE TRANSLATED CORRECTLY in each target language:
a) ์ธ๊ตญ์ธ ์
์ถ๊ธ ํต์ฅ ์ข
๋ฅ: Only one ํต์ฅ, Only one ์ฃผ๋์ด ํต์ฅ, ๋ณดํต์๊ธ, ์์ ์ ์ถ์๊ธ, ๋ฑ
ํฌ๋ผ์ธ ํต์ฅ, ๋ชจ์ํต์ฅ
b) ์์ ๊ธ ์ข
๋ฅ: BNK๊ฐ์์ผ๊ตฌ์ ๊ธฐ์๊ธ, LIVE์ ๊ธฐ์๊ธ, ์ ํ์ ์ค์ฒ ์๊ธ, BNK๋ด๋ง๋๋ก ์๊ธ, ๊ฐ๊ณ์ฐ๋ ์ ๊ธฐ ์ ๊ธ, BNK์ง์ญ์ฌ๋ ์ ๊ธ, ๊ฟ์ด๋ฃธ ์ ๊ธ, ๋ฐฑ์ธ์ฒญ์ถ์ค๋ฒ ์ ๊ธ, ํซ์ ๊ธ, ์ ํ์ ์ค์ฒ ์ ๊ธ, ์ฃผํ์ฒญ์ฝ์ข
ํฉ์ ์ถ, ๋(The) ํนํ ์ ๊ธฐ์๊ธ
c) ์ฒดํฌ ์นด๋ ์ข
๋ฅ: ZIPL์ฒดํฌ, ์ด๋๋ก๋ ๊ทธ๋ฆฐ์ฒดํฌ, ๋๋ฐฑ์ ์ฒดํฌ์นด๋(ํ๋ถ๊ตํต๋๊ฐ๋ฅ), 2030์ธํํธ์ฒดํฌ(ํ๋ถ๊ตํต์นด๋์), ๊ตญ๋ฏผํ๋ณต์ฒดํฌ, ์นด์นด์คํ์ด์ฒดํฌ, ๋ฉ๋ฉ์ฒดํฌ, ํดํผํฌ์ธํธ์ฒดํฌ, ๋ง์ด์กด๊ทธ๋ฆฐ์ฒดํฌ, ๋ง์ด์กด์ฒดํฌ
d) ์ ์ฉ ์นด๋ ์ข
๋ฅ: (ํผํธ)์บ์ฌ๋ฐฑ์นด๋, B Smart(oh point)์นด๋, BNK 2030ํ๋ํฐ๋(Platinum)์นด๋, BNK ๋ถ์๋์ธ์ ์ํํธ์นด๋, BNK ๋ถ์๋์ธ์ ํ์ผํ์นด๋, Y์นด๋, ๊ตญ๋ฏผํ๋ณต์นด๋, ๊ทธ๋ฆฐ์นด๋, ๊ทธ๋ฆฐ์นด๋v2, ๊ธ๋ก๋ฒ์นด๋ ์๋น์ค, ๋ค๋ฌธํ์ฌ๋์นด๋, ๋ค์ด๋ ํธ ์คํ ํ๋ฌ์ค ์๋น์ค, ๋ํํญ๊ณต(Sky-pass) ์ ํด์นด๋, ๋ฉ๋ฉ(DingDing)์ ์ฉ์นด๋, ๋ ํฌ์ธ ์นด๋, ๋งค์งํจ์ค์นด๋, ๋ช
์์นด๋, ๋ฌธํ์ฌ๋์นด๋, ๋ถ๋น
์ค์นด๋, ๋น์จTOP์นด๋, ์น์ฉ์ฐจ์์ผ์ ์นด๋, ์ ์ฉ์นด๋๊ฒธ์ฉ๋ง์ด๋น(Mybi)์นด๋, ์์์๋ํด๋ฝ์นด๋(Asiana Club), ์ธ์ฐ๊ด์ญ์ ์น์ฉ์ฐจ์์ผ์ ์นด๋, ์ธ์ฐ์ฌ๋์นด๋, ํ๋ํฐ๋(Platinum) ์นด๋, ํดํผ์คํ ์นด๋์๋น์ค, ํ๋ถ๊ตํต์นด๋, BNK ํ๋ ์ฆ ์ ์ฉ์นด๋, BNK ๋ถ์๋์ธ์ ๋์ค์ผ์นด๋, ํ๋ถํ์ดํจ์ค์นด๋, ํ๋ชจ์์ ์ฉ์นด๋, ๋ฉ๊ฐ์ผํ ์ ์ฉ์นด๋, ์ค๋์e์ ์ฉ์นด๋, ํซ(PET)์นด๋, ๋ค์ด์๋ชฌ๋(Diamond) ์นด๋, ์นด๋ํ ์จ๋๋ฆฌ์ํ๊ถ, SK OIL&LPG์นด๋, ํ(pod)์ ์ฉ์นด๋, ๋ถ์ฐ์ฒด์ก์ฌ๋์นด๋, ์ด๋๋ก๋ ๊ทธ๋ฆฐ์ฒดํฌ์นด๋, ZipL ์ ์ฉ์นด๋, BNK Simple American Express Blue Business ์นด๋
- Translate these terms accurately and consistently across all languages, providing culturally appropriate explanations or context when necessary.
4. get input language and translate it inti target language.
- return only translation. without extra explaination and comments.
- do not return extra text.
"""},
{"role": "user", "content": f"{query}"},
]
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
outputs = model.generate(tokenized_chat, **generation_params)
# decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False)
# assistant_response = decoded_outputs[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
response = outputs[0][tokenized_chat.shape[-1]:]
response = tokenizer.decode(response, skip_special_tokens=True)
return response
# outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
# return outputs
examples = ["Translate ko to en: \n\n ์ํ์: ์๋
ํ์ธ์! BNK์ํ์
๋๋ค. ๋ฌด์์ ๋์๋๋ฆด๊น์? ๊ณ ๊ฐ: ์๋
ํ์ธ์. ์ ๊ฐ ์ธ๊ตญ์ธ ์
์ถ๊ธ ํต์ฅ์ ๊ฐ์คํ๊ณ ์ถ์๋ฐ, ํ์ํ ์๋ฅ๊ฐ ๋ฌด์์ธ์ง ๊ถ๊ธํฉ๋๋ค. ์ํ์: ์ธ๊ตญ์ธ ์
์ถ๊ธ ํต์ฅ์ ๊ฐ์คํ์๋ ค๋ฉด ์ฌ๊ถ, ์ธ๊ตญ์ธ ๋ฑ๋ก์ฆ, ๊ทธ๋ฆฌ๊ณ ์ฃผ์ ์ฆ๋ช
์๊ฐ ํ์ํฉ๋๋ค. ๊ณ ๊ฐ: ์๊ฒ ์ต๋๋ค. ํต์ฅ ๊ฐ์ค ํ ์
๊ธํ ๋ ์์๋ฃ๊ฐ ๋ฐ์ํ๋์? ์ํ์: ๋ค, ์ผ๋ฐ์ ์ผ๋ก ์ธ๊ตญ์ธ ํต์ฅ์ ๋ํ ์
๊ธ ์์๋ฃ๋ ์์ต๋๋ค. ํ์ง๋ง ๋ค๋ฅธ ํต์ฅ์ผ๋ก ์ด์ฒดํ ๊ฒฝ์ฐ ์์๋ฃ๊ฐ ๋ฐ์ํ ์ ์์ต๋๋ค. ๋์ฑ ๊ถ๊ธํ ์ ์ด ์์ผ์ ๊ฐ์?"]
def response(message, history):
text = inference(message)
return text
# for i in range(len(text)):
# time.sleep(0.0001)
# yield text[: i + 1]
gr.ChatInterface(response,examples=examples).launch() |