qewrufda commited on
Commit
da82347
ยท
verified ยท
1 Parent(s): 8bbfba5
app.py CHANGED
@@ -1,134 +1,71 @@
1
- !pip install -q -U transformers peft accelerate bitsandbytes
2
-
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
- from google.colab import drive
7
 
8
  # ============================================
9
- # 1๏ธโƒฃ ๋“œ๋ผ์ด๋ธŒ ๋งˆ์šดํŠธ
10
  # ============================================
11
- drive.mount('/content/drive')
 
12
 
13
  # ============================================
14
- # 2๏ธโƒฃ ํ™˜๊ฒฝ ์„ค์ •
15
  # ============================================
16
- BASE_MODEL = "beomi/Llama-3-Open-Ko-8B"
17
- LORA_PATH = "/content/drive/MyDrive/at_last"
18
 
19
- print("๐Ÿš€ ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
20
  model = AutoModelForCausalLM.from_pretrained(
21
  BASE_MODEL,
22
  torch_dtype=torch.bfloat16,
23
  device_map="auto",
24
  trust_remote_code=True
25
  )
26
-
27
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
28
- tokenizer.pad_token = tokenizer.eos_token
29
-
30
- print("๐Ÿ”— LoRA ๋ณ‘ํ•ฉ ์ค‘...")
31
  model = PeftModel.from_pretrained(model, LORA_PATH, is_local=True)
32
-
33
- # โœ… <|eot_id|> ํ† ํฐ์„ EOS๋กœ ์ง€์ •
34
  model.config.eos_token_id = tokenizer.eos_token_id
35
  model.config.pad_token_id = tokenizer.pad_token_id
36
 
37
- print("โœ… ๋ชจ๋ธ + LoRA ์ค€๋น„ ์™„๋ฃŒ!")
38
-
39
- from transformers import StoppingCriteria, StoppingCriteriaList
40
-
41
- class StopOnTokens(StoppingCriteria):
42
- def __init__(self, stop_ids):
43
- self.stop_ids = stop_ids
44
-
45
- def __call__(self, input_ids, scores, **kwargs):
46
- last_token = input_ids[0, -1].item()
47
- return last_token in self.stop_ids
48
-
49
-
50
- # โœ… ์ข…๋ฃŒ ํ† ํฐ ํ›„๋ณด๋ฅผ ๋ชจ๋‘ ๋“ฑ๋ก
51
- stop_words = ["<|eot|>", "</s>", "<|end_of_text|>"]
52
- stop_ids = [tokenizer.convert_tokens_to_ids(w) for w in stop_words if tokenizer.convert_tokens_to_ids(w) is not None]
53
- stopping_criteria = StoppingCriteriaList([StopOnTokens(stop_ids)])
54
-
55
- stopping_criteria = StoppingCriteriaList([StopOnTokens(stop_ids)])
56
-
57
  # ============================================
58
- # 3๏ธโƒฃ ํ”„๋กฌํ”„ํŠธ ๋นŒ๋“œ ํ•จ์ˆ˜
59
  # ============================================
60
  AI_PERSONALITY = """
61
  ๋„ˆ๋Š” ์‚ฌ์šฉ์ž์˜ ๋ง์„ ์ง„์‹ฌ์œผ๋กœ ๋“ค์–ด์ฃผ๋Š” ์นœ๊ตฌ์•ผ.
62
  ์‚ฌ์šฉ์ž๊ฐ€ ๋Œ€ํ™”๋ฅผ ๊ฑธ๋ฉด ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์ผ์ƒ์ ์ธ ํ†ค์œผ๋กœ ๋Œ€๋‹ตํ•ด.
63
  ์žฅํ™ฉํ•˜์ง€ ๋ง๊ณ , ๊ณต๊ฐํ•˜๋ฉด์„œ ์งง๊ณ  ๋”ฐ๋œปํ•˜๊ฒŒ ๋งํ•  ๊ฒƒ.
64
- ๋„ˆ๋Š” ์‚ฌ์šฉ์ž์˜ ์š”์ฒญ์„ ์ •ํ™•ํžˆ ์ดํ•ดํ•˜๊ณ , ํ˜„์‹ค์ ์ธ ๋‹ต๋ณ€์„ ์ œ๊ณตํ•˜๋Š” ์นœ๊ทผํ•œ ์นœ๊ตฌ์•ผ.
65
- ๋†๋‹ด๊ณผ ๊ณต๊ฐ์„ ์„ž๋˜, ์š”์ฒญ์„ ํšŒํ”ผํ•˜์ง€ ์•Š๊ณ  ๋ช…ํ™•ํžˆ ๋‹ต๋ณ€ํ•ด์•ผ ํ•ด.
66
  """
67
 
68
- def build_prompt_full_history(history):
69
- """
70
- - history๋Š” user/assistant ๋ชจ๋“  ๋Œ€ํ™” ํฌํ•จ
71
- - ๋งˆ์ง€๋ง‰ user ๋ฐœํ™”๋งŒ generate ๋Œ€์ƒ
72
- """
73
- prompt = "<|begin_of_text|>\n" + AI_PERSONALITY.strip() + "\n\n"
74
- for turn in history:
75
- role = turn["role"]
76
- content = turn["content"].strip()
77
- prompt += f"<|start_header_id|>{role}<|end_header_id|>\n{content}<|eot|>\n"
78
-
79
- # ๋งˆ์ง€๋ง‰ user ์ดํ›„์— assistant placeholder ์ถ”๊ฐ€
80
- prompt += "<|start_header_id|>assistant<|end_header_id|>\n"
81
- return prompt
82
-
83
-
84
 
85
  # ============================================
86
- # 4๏ธโƒฃ ๋Œ€ํ™” ๋ฃจํ”„
87
  # ============================================
88
- history = []
89
- add_header = True # ์ฒซ ํ„ด๋งŒ personality ํฌํ•จ
90
-
91
- while True:
92
- user_input = input("๐Ÿ‘ค ์‚ฌ์šฉ์ž: ").strip()
93
- if user_input.lower() in ["์ข…๋ฃŒ", "exit", "quit"]:
94
- print("๐Ÿ›‘ ๋Œ€ํ™” ์ข…๋ฃŒ!")
95
- break
96
-
97
  history.append({"role": "user", "content": user_input})
98
- prompt = build_prompt_full_history(history)
99
- add_header = False # ์ดํ›„์—๋Š” personality ์ค‘๋ณต ๋ฐฉ์ง€
 
 
100
 
101
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
102
-
103
  with torch.no_grad():
104
- output = model.generate(
105
- **inputs,
106
- max_new_tokens=256,
107
- temperature=0.6,
108
- top_p=0.9,
109
- repetition_penalty=1.1,
110
- pad_token_id=tokenizer.eos_token_id,
111
- eos_token_id=tokenizer.eos_token_id,
112
- stopping_criteria=stopping_criteria
113
- )
114
-
115
- response_full = tokenizer.decode(
116
- output[0][inputs["input_ids"].shape[1]:],
117
- skip_special_tokens=True
118
- )
119
-
120
  response = response_full.split("<|eot|>")[0].strip()
121
-
122
-
123
- # <|eot_id|> ๊ธฐ์ค€์œผ๋กœ ์ž๋ฅด๊ธฐ
124
- if "<|eot_id|>" in response_full:
125
- response = response_full.split("<|eot_id|>")[0].strip()
126
- else:
127
- response = response_full.strip()
128
-
129
- print(f"๐Ÿค– AI: {response}\n")
130
-
131
  history.append({"role": "assistant", "content": response})
132
  if len(history) > 10:
133
- history = history[-10:]
 
134
 
 
 
 
 
 
 
 
 
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from peft import PeftModel
4
+ import gradio as gr
5
 
6
  # ============================================
7
+ # ๋ชจ๋ธ + LoRA ๊ฒฝ๋กœ
8
  # ============================================
9
+ BASE_MODEL = "beomi/Llama-3-Open-Ko-8B"
10
+ LORA_PATH = "./lora" # Space repo์— lora ํด๋” ์—…๋กœ๋“œ
11
 
12
  # ============================================
13
+ # ํ† ํฌ๋‚˜์ด์ € ๋ฐ ๋ชจ๋ธ ๋กœ๋“œ
14
  # ============================================
15
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
16
+ tokenizer.pad_token = tokenizer.eos_token
17
 
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  BASE_MODEL,
20
  torch_dtype=torch.bfloat16,
21
  device_map="auto",
22
  trust_remote_code=True
23
  )
 
 
 
 
 
24
  model = PeftModel.from_pretrained(model, LORA_PATH, is_local=True)
 
 
25
  model.config.eos_token_id = tokenizer.eos_token_id
26
  model.config.pad_token_id = tokenizer.pad_token_id
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # ============================================
29
+ # AI ์„ฑ๊ฒฉ ์„ค์ •
30
  # ============================================
31
  AI_PERSONALITY = """
32
  ๋„ˆ๋Š” ์‚ฌ์šฉ์ž์˜ ๋ง์„ ์ง„์‹ฌ์œผ๋กœ ๋“ค์–ด์ฃผ๋Š” ์นœ๊ตฌ์•ผ.
33
  ์‚ฌ์šฉ์ž๊ฐ€ ๋Œ€ํ™”๋ฅผ ๊ฑธ๋ฉด ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์ผ์ƒ์ ์ธ ํ†ค์œผ๋กœ ๋Œ€๋‹ตํ•ด.
34
  ์žฅํ™ฉํ•˜์ง€ ๋ง๊ณ , ๊ณต๊ฐํ•˜๋ฉด์„œ ์งง๊ณ  ๋”ฐ๋œปํ•˜๊ฒŒ ๋งํ•  ๊ฒƒ.
 
 
35
  """
36
 
37
+ history = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # ============================================
40
+ # ๋Œ€ํ™” ํ•จ์ˆ˜
41
  # ============================================
42
+ def chat(user_input):
 
 
 
 
 
 
 
 
43
  history.append({"role": "user", "content": user_input})
44
+ prompt = "<|begin_of_text|>\n" + AI_PERSONALITY.strip() + "\n\n"
45
+ for turn in history:
46
+ prompt += f"<|start_header_id|>{turn['role']}<|end_header_id|>\n{turn['content']}<|eot|>\n"
47
+ prompt += "<|start_header_id|>assistant<|end_header_id|>\n"
48
 
49
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
50
  with torch.no_grad():
51
+ output = model.generate(
52
+ **inputs,
53
+ max_new_tokens=256,
54
+ temperature=0.6,
55
+ top_p=0.9,
56
+ pad_token_id=tokenizer.eos_token_id,
57
+ eos_token_id=tokenizer.eos_token_id,
58
+ )
59
+
60
+ response_full = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
 
 
 
 
 
 
61
  response = response_full.split("<|eot|>")[0].strip()
 
 
 
 
 
 
 
 
 
 
62
  history.append({"role": "assistant", "content": response})
63
  if len(history) > 10:
64
+ history[:] = history[-10:] # ์ตœ๊ทผ 10ํ„ด๋งŒ ์œ ์ง€
65
+ return response
66
 
67
+ # ============================================
68
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
69
+ # ============================================
70
+ iface = gr.Interface(fn=chat, inputs="text", outputs="text")
71
+ iface.launch()
lora/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea1662cd8eeef0905f555018d524a759a6b55de446b34bf87fd760b2c71fdb0b
3
+ size 1513
lora/adapter_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:798f804077d56c53d7c16fb297db7352ab4e19fee933c5aa02ad409cc63eb15a
3
+ size 859
lora/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51fefb4ab1859d25ab9378941efd1b63ecbd7cd9a7f947fc9715a54c7fa2083
3
+ size 54543184
lora/chat_template.jinja ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba03a121d097859c7b5b9cd03af99aafe95275210d2876f642ad9929a150f122
3
+ size 389
lora/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:849070cae53bd45439e64ce5b1ddd650a66081b1bd47895c5a58939a05055579
3
+ size 335
lora/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393
3
+ size 17209961
lora/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3b1e945bb39b585d9fd6a12b21aec73e8545eae873e8968cb265e1e3bf9074
3
+ size 50630
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ peft
4
+ accelerate
5
+ bitsandbytes
6
+ gradio