Update README.md
Browse files
README.md
CHANGED
@@ -53,7 +53,7 @@ Here is the table summarizing the architecture used for training, along with the
|
|
53 |
| Hyperparameter | Value |
|
54 |
|:---------------------:|:----------:|
|
55 |
| label smoothing | 0.05 |
|
56 |
-
|
|
57 |
| betas | 0.9, 0.999 |
|
58 |
| AMSGrad | True |
|
59 |
| learning rate | 5e-4 |
|
@@ -112,6 +112,6 @@ Citation
|
|
112 |
AUTHOR = {Cyrile Delestre},
|
113 |
URL = {https://huggingface.co/cmarkea/bloomz-560m-sft-chat},
|
114 |
YEAR = {2023},
|
115 |
-
KEYWORDS = {NLP ; Transformers ; Bloomz},
|
116 |
}
|
117 |
```
|
|
|
53 |
| Hyperparameter | Value |
|
54 |
|:---------------------:|:----------:|
|
55 |
| label smoothing | 0.05 |
|
56 |
+
| optimizer | AdamW |
|
57 |
| betas | 0.9, 0.999 |
|
58 |
| AMSGrad | True |
|
59 |
| learning rate | 5e-4 |
|
|
|
112 |
AUTHOR = {Cyrile Delestre},
|
113 |
URL = {https://huggingface.co/cmarkea/bloomz-560m-sft-chat},
|
114 |
YEAR = {2023},
|
115 |
+
KEYWORDS = {NLP ; Transformers ; LLM ; Bloomz},
|
116 |
}
|
117 |
```
|