anhdungitvn
commited on
Commit
•
57c92dd
1
Parent(s):
dd765ed
Update README.md
Browse files
README.md
CHANGED
@@ -11,7 +11,142 @@ language:
|
|
11 |
### Method
|
12 |
- Promt-Tuning/Prefix-tuning/Soft Embedding
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
### Metrics
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
| | precision | recall | f1-score | support |
|
16 |
|--------------|-----------|--------|----------|---------|
|
17 |
| 긍정 | 0.92549 | 0.944 | 0.934653 | 500 |
|
@@ -20,6 +155,13 @@ language:
|
|
20 |
| macro avg | 0.934174 | 0.934 | 0.933993 | 1000 |
|
21 |
| weighted avg | 0.934174 | 0.934 | 0.933993 | 1000 |
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
### References
|
24 |
- Prompt Tuning: <a href="https://arxiv.org/abs/2104.08691" download>**The Power of Scale for Parameter-Efficient Prompt Tuning**</a>
|
25 |
- Prompt Tuning v2: <a href="https://arxiv.org/abs/2110.07602" download>**P-Tuning v2: Prompt Tuning Can Be Comparable to Fine-tuning Universally Across Scales and Tasks**</a>
|
|
|
11 |
### Method
|
12 |
- Promt-Tuning/Prefix-tuning/Soft Embedding
|
13 |
|
14 |
+
### Model
|
15 |
+
```
|
16 |
+
LAYER NAME #PARAMS RATIO MEM(MB)
|
17 |
+
--model: 6,177,233,921 100.00% 23552.28
|
18 |
+
--learned_embedding: 6,537,216 0.11% 24.94
|
19 |
+
--transformer: 5,906,391,041 95.62% 22519.09
|
20 |
+
--wte
|
21 |
+
--weight: 264,241,152 4.28% 1008.00
|
22 |
+
--h: 5,642,141,697 91.34% 21511.06
|
23 |
+
--0: 205,549,569 3.33% 772.11
|
24 |
+
--ln_1: 8,192 0.00% 0.03
|
25 |
+
--attn: 71,303,169 1.15% 260.00
|
26 |
+
--mlp: 134,238,208 2.17% 512.08
|
27 |
+
--1(partially shared): 201,355,264 3.26% 768.11
|
28 |
+
--ln_1: 8,192 0.00% 0.03
|
29 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
30 |
+
--mlp: 134,238,208 2.17% 512.08
|
31 |
+
--2(partially shared): 201,355,264 3.26% 768.11
|
32 |
+
--ln_1: 8,192 0.00% 0.03
|
33 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
34 |
+
--mlp: 134,238,208 2.17% 512.08
|
35 |
+
--3(partially shared): 201,355,264 3.26% 768.11
|
36 |
+
--ln_1: 8,192 0.00% 0.03
|
37 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
38 |
+
--mlp: 134,238,208 2.17% 512.08
|
39 |
+
--4(partially shared): 201,355,264 3.26% 768.11
|
40 |
+
--ln_1: 8,192 0.00% 0.03
|
41 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
42 |
+
--mlp: 134,238,208 2.17% 512.08
|
43 |
+
--5(partially shared): 201,355,264 3.26% 768.11
|
44 |
+
--ln_1: 8,192 0.00% 0.03
|
45 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
46 |
+
--mlp: 134,238,208 2.17% 512.08
|
47 |
+
--6(partially shared): 201,355,264 3.26% 768.11
|
48 |
+
--ln_1: 8,192 0.00% 0.03
|
49 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
50 |
+
--mlp: 134,238,208 2.17% 512.08
|
51 |
+
--7(partially shared): 201,355,264 3.26% 768.11
|
52 |
+
--ln_1: 8,192 0.00% 0.03
|
53 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
54 |
+
--mlp: 134,238,208 2.17% 512.08
|
55 |
+
--8(partially shared): 201,355,264 3.26% 768.11
|
56 |
+
--ln_1: 8,192 0.00% 0.03
|
57 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
58 |
+
--mlp: 134,238,208 2.17% 512.08
|
59 |
+
--9(partially shared): 201,355,264 3.26% 768.11
|
60 |
+
--ln_1: 8,192 0.00% 0.03
|
61 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
62 |
+
--mlp: 134,238,208 2.17% 512.08
|
63 |
+
--10(partially shared): 201,355,264 3.26% 768.11
|
64 |
+
--ln_1: 8,192 0.00% 0.03
|
65 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
66 |
+
--mlp: 134,238,208 2.17% 512.08
|
67 |
+
--11(partially shared): 201,355,264 3.26% 768.11
|
68 |
+
--ln_1: 8,192 0.00% 0.03
|
69 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
70 |
+
--mlp: 134,238,208 2.17% 512.08
|
71 |
+
--12(partially shared): 201,355,264 3.26% 768.11
|
72 |
+
--ln_1: 8,192 0.00% 0.03
|
73 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
74 |
+
--mlp: 134,238,208 2.17% 512.08
|
75 |
+
--13(partially shared): 201,355,264 3.26% 768.11
|
76 |
+
--ln_1: 8,192 0.00% 0.03
|
77 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
78 |
+
--mlp: 134,238,208 2.17% 512.08
|
79 |
+
--14(partially shared): 201,355,264 3.26% 768.11
|
80 |
+
--ln_1: 8,192 0.00% 0.03
|
81 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
82 |
+
--mlp: 134,238,208 2.17% 512.08
|
83 |
+
--15(partially shared): 201,355,264 3.26% 768.11
|
84 |
+
--ln_1: 8,192 0.00% 0.03
|
85 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
86 |
+
--mlp: 134,238,208 2.17% 512.08
|
87 |
+
--16(partially shared): 201,355,264 3.26% 768.11
|
88 |
+
--ln_1: 8,192 0.00% 0.03
|
89 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
90 |
+
--mlp: 134,238,208 2.17% 512.08
|
91 |
+
--17(partially shared): 201,355,264 3.26% 768.11
|
92 |
+
--ln_1: 8,192 0.00% 0.03
|
93 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
94 |
+
--mlp: 134,238,208 2.17% 512.08
|
95 |
+
--18(partially shared): 201,355,264 3.26% 768.11
|
96 |
+
--ln_1: 8,192 0.00% 0.03
|
97 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
98 |
+
--mlp: 134,238,208 2.17% 512.08
|
99 |
+
--19(partially shared): 201,355,264 3.26% 768.11
|
100 |
+
--ln_1: 8,192 0.00% 0.03
|
101 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
102 |
+
--mlp: 134,238,208 2.17% 512.08
|
103 |
+
--20(partially shared): 201,355,264 3.26% 768.11
|
104 |
+
--ln_1: 8,192 0.00% 0.03
|
105 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
106 |
+
--mlp: 134,238,208 2.17% 512.08
|
107 |
+
--21(partially shared): 201,355,264 3.26% 768.11
|
108 |
+
--ln_1: 8,192 0.00% 0.03
|
109 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
110 |
+
--mlp: 134,238,208 2.17% 512.08
|
111 |
+
--22(partially shared): 201,355,264 3.26% 768.11
|
112 |
+
--ln_1: 8,192 0.00% 0.03
|
113 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
114 |
+
--mlp: 134,238,208 2.17% 512.08
|
115 |
+
--23(partially shared): 201,355,264 3.26% 768.11
|
116 |
+
--ln_1: 8,192 0.00% 0.03
|
117 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
118 |
+
--mlp: 134,238,208 2.17% 512.08
|
119 |
+
--24(partially shared): 201,355,264 3.26% 768.11
|
120 |
+
--ln_1: 8,192 0.00% 0.03
|
121 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
122 |
+
--mlp: 134,238,208 2.17% 512.08
|
123 |
+
--25(partially shared): 201,355,264 3.26% 768.11
|
124 |
+
--ln_1: 8,192 0.00% 0.03
|
125 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
126 |
+
--mlp: 134,238,208 2.17% 512.08
|
127 |
+
--26(partially shared): 201,355,264 3.26% 768.11
|
128 |
+
--ln_1: 8,192 0.00% 0.03
|
129 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
130 |
+
--mlp: 134,238,208 2.17% 512.08
|
131 |
+
--27(partially shared): 201,355,264 3.26% 768.11
|
132 |
+
--ln_1: 8,192 0.00% 0.03
|
133 |
+
--attn(shared): 67,108,864 1.09% 256.00
|
134 |
+
--mlp: 134,238,208 2.17% 512.08
|
135 |
+
--ln_f: 8,192 0.00% 0.03
|
136 |
+
--weight: 4,096 0.00% 0.02
|
137 |
+
--bias: 4,096 0.00% 0.02
|
138 |
+
--lm_head: 264,305,664 4.28% 1008.25
|
139 |
+
--weight: 264,241,152 4.28% 1008.00
|
140 |
+
--bias: 64,512 0.00% 0.25
|
141 |
+
```
|
142 |
+
|
143 |
### Metrics
|
144 |
+
|
145 |
+
| Metric | Value |
|
146 |
+
|--------|--------|
|
147 |
+
| step | 520 |
|
148 |
+
| loss | 3.1814 |
|
149 |
+
|
150 |
| | precision | recall | f1-score | support |
|
151 |
|--------------|-----------|--------|----------|---------|
|
152 |
| 긍정 | 0.92549 | 0.944 | 0.934653 | 500 |
|
|
|
155 |
| macro avg | 0.934174 | 0.934 | 0.933993 | 1000 |
|
156 |
| weighted avg | 0.934174 | 0.934 | 0.933993 | 1000 |
|
157 |
|
158 |
+
|
159 |
+
<img src="https://huggingface.co/anhdungitvn/ko-gpt-bot-sc/resolve/main/metrics/loss.png" width="800">
|
160 |
+
<img src="https://huggingface.co/anhdungitvn/ko-gpt-bot-sc/resolve/main/metrics/labels_preds.png" width="800">
|
161 |
+
<img src="https://huggingface.co/anhdungitvn/ko-gpt-bot-sc/resolve/main/metrics/confusion_viz.png" width="800">
|
162 |
+
|
163 |
+
|
164 |
+
|
165 |
### References
|
166 |
- Prompt Tuning: <a href="https://arxiv.org/abs/2104.08691" download>**The Power of Scale for Parameter-Efficient Prompt Tuning**</a>
|
167 |
- Prompt Tuning v2: <a href="https://arxiv.org/abs/2110.07602" download>**P-Tuning v2: Prompt Tuning Can Be Comparable to Fine-tuning Universally Across Scales and Tasks**</a>
|