"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...\n",
"/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n",
"The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [622/622 41:28, Epoch 1/1]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 1.197200 | \n",
"
\n",
" \n",
" 2 | \n",
" 1.141000 | \n",
"
\n",
" \n",
" 3 | \n",
" 1.131400 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.086400 | \n",
"
\n",
" \n",
" 5 | \n",
" 1.089900 | \n",
"
\n",
" \n",
" 6 | \n",
" 1.004200 | \n",
"
\n",
" \n",
" 7 | \n",
" 1.032800 | \n",
"
\n",
" \n",
" 8 | \n",
" 1.062700 | \n",
"
\n",
" \n",
" 9 | \n",
" 1.045000 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.994600 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.979000 | \n",
"
\n",
" \n",
" 12 | \n",
" 0.966600 | \n",
"
\n",
" \n",
" 13 | \n",
" 0.980000 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.914500 | \n",
"
\n",
" \n",
" 15 | \n",
" 0.952300 | \n",
"
\n",
" \n",
" 16 | \n",
" 0.915400 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.941800 | \n",
"
\n",
" \n",
" 18 | \n",
" 0.949200 | \n",
"
\n",
" \n",
" 19 | \n",
" 0.864800 | \n",
"
\n",
" \n",
" 20 | \n",
" 0.937400 | \n",
"
\n",
" \n",
" 21 | \n",
" 0.959400 | \n",
"
\n",
" \n",
" 22 | \n",
" 0.929800 | \n",
"
\n",
" \n",
" 23 | \n",
" 0.892400 | \n",
"
\n",
" \n",
" 24 | \n",
" 0.900700 | \n",
"
\n",
" \n",
" 25 | \n",
" 0.891200 | \n",
"
\n",
" \n",
" 26 | \n",
" 0.910400 | \n",
"
\n",
" \n",
" 27 | \n",
" 0.850800 | \n",
"
\n",
" \n",
" 28 | \n",
" 0.912600 | \n",
"
\n",
" \n",
" 29 | \n",
" 0.832900 | \n",
"
\n",
" \n",
" 30 | \n",
" 0.846400 | \n",
"
\n",
" \n",
" 31 | \n",
" 0.840500 | \n",
"
\n",
" \n",
" 32 | \n",
" 0.856000 | \n",
"
\n",
" \n",
" 33 | \n",
" 0.793800 | \n",
"
\n",
" \n",
" 34 | \n",
" 0.901100 | \n",
"
\n",
" \n",
" 35 | \n",
" 0.871500 | \n",
"
\n",
" \n",
" 36 | \n",
" 0.834300 | \n",
"
\n",
" \n",
" 37 | \n",
" 0.832300 | \n",
"
\n",
" \n",
" 38 | \n",
" 0.810800 | \n",
"
\n",
" \n",
" 39 | \n",
" 0.840100 | \n",
"
\n",
" \n",
" 40 | \n",
" 0.886200 | \n",
"
\n",
" \n",
" 41 | \n",
" 0.823800 | \n",
"
\n",
" \n",
" 42 | \n",
" 0.823300 | \n",
"
\n",
" \n",
" 43 | \n",
" 0.868200 | \n",
"
\n",
" \n",
" 44 | \n",
" 0.851900 | \n",
"
\n",
" \n",
" 45 | \n",
" 0.845500 | \n",
"
\n",
" \n",
" 46 | \n",
" 0.829100 | \n",
"
\n",
" \n",
" 47 | \n",
" 0.826400 | \n",
"
\n",
" \n",
" 48 | \n",
" 0.850900 | \n",
"
\n",
" \n",
" 49 | \n",
" 0.808600 | \n",
"
\n",
" \n",
" 50 | \n",
" 0.832700 | \n",
"
\n",
" \n",
" 51 | \n",
" 0.784200 | \n",
"
\n",
" \n",
" 52 | \n",
" 0.810200 | \n",
"
\n",
" \n",
" 53 | \n",
" 0.785500 | \n",
"
\n",
" \n",
" 54 | \n",
" 0.776400 | \n",
"
\n",
" \n",
" 55 | \n",
" 0.784800 | \n",
"
\n",
" \n",
" 56 | \n",
" 0.796800 | \n",
"
\n",
" \n",
" 57 | \n",
" 0.803300 | \n",
"
\n",
" \n",
" 58 | \n",
" 0.776000 | \n",
"
\n",
" \n",
" 59 | \n",
" 0.829500 | \n",
"
\n",
" \n",
" 60 | \n",
" 0.748200 | \n",
"
\n",
" \n",
" 61 | \n",
" 0.778100 | \n",
"
\n",
" \n",
" 62 | \n",
" 0.757000 | \n",
"
\n",
" \n",
" 63 | \n",
" 0.818700 | \n",
"
\n",
" \n",
" 64 | \n",
" 0.846200 | \n",
"
\n",
" \n",
" 65 | \n",
" 0.811500 | \n",
"
\n",
" \n",
" 66 | \n",
" 0.804400 | \n",
"
\n",
" \n",
" 67 | \n",
" 0.752500 | \n",
"
\n",
" \n",
" 68 | \n",
" 0.768000 | \n",
"
\n",
" \n",
" 69 | \n",
" 0.773200 | \n",
"
\n",
" \n",
" 70 | \n",
" 0.763800 | \n",
"
\n",
" \n",
" 71 | \n",
" 0.725100 | \n",
"
\n",
" \n",
" 72 | \n",
" 0.794800 | \n",
"
\n",
" \n",
" 73 | \n",
" 0.734700 | \n",
"
\n",
" \n",
" 74 | \n",
" 0.732800 | \n",
"
\n",
" \n",
" 75 | \n",
" 0.758000 | \n",
"
\n",
" \n",
" 76 | \n",
" 0.710200 | \n",
"
\n",
" \n",
" 77 | \n",
" 0.781100 | \n",
"
\n",
" \n",
" 78 | \n",
" 0.753400 | \n",
"
\n",
" \n",
" 79 | \n",
" 0.701600 | \n",
"
\n",
" \n",
" 80 | \n",
" 0.758800 | \n",
"
\n",
" \n",
" 81 | \n",
" 0.837000 | \n",
"
\n",
" \n",
" 82 | \n",
" 0.789900 | \n",
"
\n",
" \n",
" 83 | \n",
" 0.775300 | \n",
"
\n",
" \n",
" 84 | \n",
" 0.737000 | \n",
"
\n",
" \n",
" 85 | \n",
" 0.776300 | \n",
"
\n",
" \n",
" 86 | \n",
" 0.755400 | \n",
"
\n",
" \n",
" 87 | \n",
" 0.745100 | \n",
"
\n",
" \n",
" 88 | \n",
" 0.743800 | \n",
"
\n",
" \n",
" 89 | \n",
" 0.693900 | \n",
"
\n",
" \n",
" 90 | \n",
" 0.733400 | \n",
"
\n",
" \n",
" 91 | \n",
" 0.786900 | \n",
"
\n",
" \n",
" 92 | \n",
" 0.766600 | \n",
"
\n",
" \n",
" 93 | \n",
" 0.769400 | \n",
"
\n",
" \n",
" 94 | \n",
" 0.720600 | \n",
"
\n",
" \n",
" 95 | \n",
" 0.730200 | \n",
"
\n",
" \n",
" 96 | \n",
" 0.729800 | \n",
"
\n",
" \n",
" 97 | \n",
" 0.740800 | \n",
"
\n",
" \n",
" 98 | \n",
" 0.767000 | \n",
"
\n",
" \n",
" 99 | \n",
" 0.757500 | \n",
"
\n",
" \n",
" 100 | \n",
" 0.737800 | \n",
"
\n",
" \n",
" 101 | \n",
" 0.728100 | \n",
"
\n",
" \n",
" 102 | \n",
" 0.755200 | \n",
"
\n",
" \n",
" 103 | \n",
" 0.698300 | \n",
"
\n",
" \n",
" 104 | \n",
" 0.711400 | \n",
"
\n",
" \n",
" 105 | \n",
" 0.766700 | \n",
"
\n",
" \n",
" 106 | \n",
" 0.749500 | \n",
"
\n",
" \n",
" 107 | \n",
" 0.705200 | \n",
"
\n",
" \n",
" 108 | \n",
" 0.680300 | \n",
"
\n",
" \n",
" 109 | \n",
" 0.674500 | \n",
"
\n",
" \n",
" 110 | \n",
" 0.706600 | \n",
"
\n",
" \n",
" 111 | \n",
" 0.759000 | \n",
"
\n",
" \n",
" 112 | \n",
" 0.699500 | \n",
"
\n",
" \n",
" 113 | \n",
" 0.709700 | \n",
"
\n",
" \n",
" 114 | \n",
" 0.714800 | \n",
"
\n",
" \n",
" 115 | \n",
" 0.708000 | \n",
"
\n",
" \n",
" 116 | \n",
" 0.700300 | \n",
"
\n",
" \n",
" 117 | \n",
" 0.673500 | \n",
"
\n",
" \n",
" 118 | \n",
" 0.760100 | \n",
"
\n",
" \n",
" 119 | \n",
" 0.694300 | \n",
"
\n",
" \n",
" 120 | \n",
" 0.706500 | \n",
"
\n",
" \n",
" 121 | \n",
" 0.721300 | \n",
"
\n",
" \n",
" 122 | \n",
" 0.698400 | \n",
"
\n",
" \n",
" 123 | \n",
" 0.738900 | \n",
"
\n",
" \n",
" 124 | \n",
" 0.729600 | \n",
"
\n",
" \n",
" 125 | \n",
" 0.696200 | \n",
"
\n",
" \n",
" 126 | \n",
" 0.676000 | \n",
"
\n",
" \n",
" 127 | \n",
" 0.695700 | \n",
"
\n",
" \n",
" 128 | \n",
" 0.729200 | \n",
"
\n",
" \n",
" 129 | \n",
" 0.730000 | \n",
"
\n",
" \n",
" 130 | \n",
" 0.719900 | \n",
"
\n",
" \n",
" 131 | \n",
" 0.726200 | \n",
"
\n",
" \n",
" 132 | \n",
" 0.693100 | \n",
"
\n",
" \n",
" 133 | \n",
" 0.706900 | \n",
"
\n",
" \n",
" 134 | \n",
" 0.708700 | \n",
"
\n",
" \n",
" 135 | \n",
" 0.691700 | \n",
"
\n",
" \n",
" 136 | \n",
" 0.682500 | \n",
"
\n",
" \n",
" 137 | \n",
" 0.727800 | \n",
"
\n",
" \n",
" 138 | \n",
" 0.633700 | \n",
"
\n",
" \n",
" 139 | \n",
" 0.710700 | \n",
"
\n",
" \n",
" 140 | \n",
" 0.653100 | \n",
"
\n",
" \n",
" 141 | \n",
" 0.717000 | \n",
"
\n",
" \n",
" 142 | \n",
" 0.732800 | \n",
"
\n",
" \n",
" 143 | \n",
" 0.677000 | \n",
"
\n",
" \n",
" 144 | \n",
" 0.688600 | \n",
"
\n",
" \n",
" 145 | \n",
" 0.673100 | \n",
"
\n",
" \n",
" 146 | \n",
" 0.678900 | \n",
"
\n",
" \n",
" 147 | \n",
" 0.679900 | \n",
"
\n",
" \n",
" 148 | \n",
" 0.667800 | \n",
"
\n",
" \n",
" 149 | \n",
" 0.643900 | \n",
"
\n",
" \n",
" 150 | \n",
" 0.679000 | \n",
"
\n",
" \n",
" 151 | \n",
" 0.666700 | \n",
"
\n",
" \n",
" 152 | \n",
" 0.695600 | \n",
"
\n",
" \n",
" 153 | \n",
" 0.655300 | \n",
"
\n",
" \n",
" 154 | \n",
" 0.710500 | \n",
"
\n",
" \n",
" 155 | \n",
" 0.659700 | \n",
"
\n",
" \n",
" 156 | \n",
" 0.717600 | \n",
"
\n",
" \n",
" 157 | \n",
" 0.657500 | \n",
"
\n",
" \n",
" 158 | \n",
" 0.657900 | \n",
"
\n",
" \n",
" 159 | \n",
" 0.695600 | \n",
"
\n",
" \n",
" 160 | \n",
" 0.673400 | \n",
"
\n",
" \n",
" 161 | \n",
" 0.642500 | \n",
"
\n",
" \n",
" 162 | \n",
" 0.702800 | \n",
"
\n",
" \n",
" 163 | \n",
" 0.713500 | \n",
"
\n",
" \n",
" 164 | \n",
" 0.674100 | \n",
"
\n",
" \n",
" 165 | \n",
" 0.746000 | \n",
"
\n",
" \n",
" 166 | \n",
" 0.676800 | \n",
"
\n",
" \n",
" 167 | \n",
" 0.669100 | \n",
"
\n",
" \n",
" 168 | \n",
" 0.668800 | \n",
"
\n",
" \n",
" 169 | \n",
" 0.655000 | \n",
"
\n",
" \n",
" 170 | \n",
" 0.684400 | \n",
"
\n",
" \n",
" 171 | \n",
" 0.688200 | \n",
"
\n",
" \n",
" 172 | \n",
" 0.705100 | \n",
"
\n",
" \n",
" 173 | \n",
" 0.669600 | \n",
"
\n",
" \n",
" 174 | \n",
" 0.654800 | \n",
"
\n",
" \n",
" 175 | \n",
" 0.691300 | \n",
"
\n",
" \n",
" 176 | \n",
" 0.640200 | \n",
"
\n",
" \n",
" 177 | \n",
" 0.691600 | \n",
"
\n",
" \n",
" 178 | \n",
" 0.701600 | \n",
"
\n",
" \n",
" 179 | \n",
" 0.718500 | \n",
"
\n",
" \n",
" 180 | \n",
" 0.629500 | \n",
"
\n",
" \n",
" 181 | \n",
" 0.706600 | \n",
"
\n",
" \n",
" 182 | \n",
" 0.661800 | \n",
"
\n",
" \n",
" 183 | \n",
" 0.649300 | \n",
"
\n",
" \n",
" 184 | \n",
" 0.687800 | \n",
"
\n",
" \n",
" 185 | \n",
" 0.623300 | \n",
"
\n",
" \n",
" 186 | \n",
" 0.729500 | \n",
"
\n",
" \n",
" 187 | \n",
" 0.645000 | \n",
"
\n",
" \n",
" 188 | \n",
" 0.723100 | \n",
"
\n",
" \n",
" 189 | \n",
" 0.665900 | \n",
"
\n",
" \n",
" 190 | \n",
" 0.628100 | \n",
"
\n",
" \n",
" 191 | \n",
" 0.707700 | \n",
"
\n",
" \n",
" 192 | \n",
" 0.676500 | \n",
"
\n",
" \n",
" 193 | \n",
" 0.644600 | \n",
"
\n",
" \n",
" 194 | \n",
" 0.658400 | \n",
"
\n",
" \n",
" 195 | \n",
" 0.729700 | \n",
"
\n",
" \n",
" 196 | \n",
" 0.668800 | \n",
"
\n",
" \n",
" 197 | \n",
" 0.672800 | \n",
"
\n",
" \n",
" 198 | \n",
" 0.667000 | \n",
"
\n",
" \n",
" 199 | \n",
" 0.679100 | \n",
"
\n",
" \n",
" 200 | \n",
" 0.656400 | \n",
"
\n",
" \n",
" 201 | \n",
" 0.633200 | \n",
"
\n",
" \n",
" 202 | \n",
" 0.651700 | \n",
"
\n",
" \n",
" 203 | \n",
" 0.648600 | \n",
"
\n",
" \n",
" 204 | \n",
" 0.603300 | \n",
"
\n",
" \n",
" 205 | \n",
" 0.655100 | \n",
"
\n",
" \n",
" 206 | \n",
" 0.637800 | \n",
"
\n",
" \n",
" 207 | \n",
" 0.624800 | \n",
"
\n",
" \n",
" 208 | \n",
" 0.635600 | \n",
"
\n",
" \n",
" 209 | \n",
" 0.640000 | \n",
"
\n",
" \n",
" 210 | \n",
" 0.693500 | \n",
"
\n",
" \n",
" 211 | \n",
" 0.677000 | \n",
"
\n",
" \n",
" 212 | \n",
" 0.625200 | \n",
"
\n",
" \n",
" 213 | \n",
" 0.668800 | \n",
"
\n",
" \n",
" 214 | \n",
" 0.633200 | \n",
"
\n",
" \n",
" 215 | \n",
" 0.643800 | \n",
"
\n",
" \n",
" 216 | \n",
" 0.677900 | \n",
"
\n",
" \n",
" 217 | \n",
" 0.602000 | \n",
"
\n",
" \n",
" 218 | \n",
" 0.616500 | \n",
"
\n",
" \n",
" 219 | \n",
" 0.653500 | \n",
"
\n",
" \n",
" 220 | \n",
" 0.641100 | \n",
"
\n",
" \n",
" 221 | \n",
" 0.624500 | \n",
"
\n",
" \n",
" 222 | \n",
" 0.684600 | \n",
"
\n",
" \n",
" 223 | \n",
" 0.670300 | \n",
"
\n",
" \n",
" 224 | \n",
" 0.675900 | \n",
"
\n",
" \n",
" 225 | \n",
" 0.609500 | \n",
"
\n",
" \n",
" 226 | \n",
" 0.600900 | \n",
"
\n",
" \n",
" 227 | \n",
" 0.642300 | \n",
"
\n",
" \n",
" 228 | \n",
" 0.607700 | \n",
"
\n",
" \n",
" 229 | \n",
" 0.666700 | \n",
"
\n",
" \n",
" 230 | \n",
" 0.613300 | \n",
"
\n",
" \n",
" 231 | \n",
" 0.661400 | \n",
"
\n",
" \n",
" 232 | \n",
" 0.661800 | \n",
"
\n",
" \n",
" 233 | \n",
" 0.627900 | \n",
"
\n",
" \n",
" 234 | \n",
" 0.707200 | \n",
"
\n",
" \n",
" 235 | \n",
" 0.611800 | \n",
"
\n",
" \n",
" 236 | \n",
" 0.611900 | \n",
"
\n",
" \n",
" 237 | \n",
" 0.574400 | \n",
"
\n",
" \n",
" 238 | \n",
" 0.623300 | \n",
"
\n",
" \n",
" 239 | \n",
" 0.681000 | \n",
"
\n",
" \n",
" 240 | \n",
" 0.622300 | \n",
"
\n",
" \n",
" 241 | \n",
" 0.651900 | \n",
"
\n",
" \n",
" 242 | \n",
" 0.614700 | \n",
"
\n",
" \n",
" 243 | \n",
" 0.654900 | \n",
"
\n",
" \n",
" 244 | \n",
" 0.663600 | \n",
"
\n",
" \n",
" 245 | \n",
" 0.670500 | \n",
"
\n",
" \n",
" 246 | \n",
" 0.619700 | \n",
"
\n",
" \n",
" 247 | \n",
" 0.586900 | \n",
"
\n",
" \n",
" 248 | \n",
" 0.644200 | \n",
"
\n",
" \n",
" 249 | \n",
" 0.614600 | \n",
"
\n",
" \n",
" 250 | \n",
" 0.641000 | \n",
"
\n",
" \n",
" 251 | \n",
" 0.633500 | \n",
"
\n",
" \n",
" 252 | \n",
" 0.645700 | \n",
"
\n",
" \n",
" 253 | \n",
" 0.672500 | \n",
"
\n",
" \n",
" 254 | \n",
" 0.635300 | \n",
"
\n",
" \n",
" 255 | \n",
" 0.644100 | \n",
"
\n",
" \n",
" 256 | \n",
" 0.641300 | \n",
"
\n",
" \n",
" 257 | \n",
" 0.569300 | \n",
"
\n",
" \n",
" 258 | \n",
" 0.674100 | \n",
"
\n",
" \n",
" 259 | \n",
" 0.622000 | \n",
"
\n",
" \n",
" 260 | \n",
" 0.659600 | \n",
"
\n",
" \n",
" 261 | \n",
" 0.605200 | \n",
"
\n",
" \n",
" 262 | \n",
" 0.628800 | \n",
"
\n",
" \n",
" 263 | \n",
" 0.606600 | \n",
"
\n",
" \n",
" 264 | \n",
" 0.591900 | \n",
"
\n",
" \n",
" 265 | \n",
" 0.623100 | \n",
"
\n",
" \n",
" 266 | \n",
" 0.604400 | \n",
"
\n",
" \n",
" 267 | \n",
" 0.605600 | \n",
"
\n",
" \n",
" 268 | \n",
" 0.655400 | \n",
"
\n",
" \n",
" 269 | \n",
" 0.695500 | \n",
"
\n",
" \n",
" 270 | \n",
" 0.618400 | \n",
"
\n",
" \n",
" 271 | \n",
" 0.669500 | \n",
"
\n",
" \n",
" 272 | \n",
" 0.641000 | \n",
"
\n",
" \n",
" 273 | \n",
" 0.626000 | \n",
"
\n",
" \n",
" 274 | \n",
" 0.617500 | \n",
"
\n",
" \n",
" 275 | \n",
" 0.620000 | \n",
"
\n",
" \n",
" 276 | \n",
" 0.638700 | \n",
"
\n",
" \n",
" 277 | \n",
" 0.592700 | \n",
"
\n",
" \n",
" 278 | \n",
" 0.648200 | \n",
"
\n",
" \n",
" 279 | \n",
" 0.636100 | \n",
"
\n",
" \n",
" 280 | \n",
" 0.581300 | \n",
"
\n",
" \n",
" 281 | \n",
" 0.557300 | \n",
"
\n",
" \n",
" 282 | \n",
" 0.643300 | \n",
"
\n",
" \n",
" 283 | \n",
" 0.646800 | \n",
"
\n",
" \n",
" 284 | \n",
" 0.625300 | \n",
"
\n",
" \n",
" 285 | \n",
" 0.654400 | \n",
"
\n",
" \n",
" 286 | \n",
" 0.607100 | \n",
"
\n",
" \n",
" 287 | \n",
" 0.593400 | \n",
"
\n",
" \n",
" 288 | \n",
" 0.596900 | \n",
"
\n",
" \n",
" 289 | \n",
" 0.539600 | \n",
"
\n",
" \n",
" 290 | \n",
" 0.620200 | \n",
"
\n",
" \n",
" 291 | \n",
" 0.595400 | \n",
"
\n",
" \n",
" 292 | \n",
" 0.589700 | \n",
"
\n",
" \n",
" 293 | \n",
" 0.642000 | \n",
"
\n",
" \n",
" 294 | \n",
" 0.569100 | \n",
"
\n",
" \n",
" 295 | \n",
" 0.595600 | \n",
"
\n",
" \n",
" 296 | \n",
" 0.594500 | \n",
"
\n",
" \n",
" 297 | \n",
" 0.646400 | \n",
"
\n",
" \n",
" 298 | \n",
" 0.630300 | \n",
"
\n",
" \n",
" 299 | \n",
" 0.658800 | \n",
"
\n",
" \n",
" 300 | \n",
" 0.614100 | \n",
"
\n",
" \n",
" 301 | \n",
" 0.663500 | \n",
"
\n",
" \n",
" 302 | \n",
" 0.649000 | \n",
"
\n",
" \n",
" 303 | \n",
" 0.609400 | \n",
"
\n",
" \n",
" 304 | \n",
" 0.615200 | \n",
"
\n",
" \n",
" 305 | \n",
" 0.628400 | \n",
"
\n",
" \n",
" 306 | \n",
" 0.599600 | \n",
"
\n",
" \n",
" 307 | \n",
" 0.611500 | \n",
"
\n",
" \n",
" 308 | \n",
" 0.605600 | \n",
"
\n",
" \n",
" 309 | \n",
" 0.590200 | \n",
"
\n",
" \n",
" 310 | \n",
" 0.607900 | \n",
"
\n",
" \n",
" 311 | \n",
" 0.627600 | \n",
"
\n",
" \n",
" 312 | \n",
" 0.623900 | \n",
"
\n",
" \n",
" 313 | \n",
" 0.643100 | \n",
"
\n",
" \n",
" 314 | \n",
" 0.609400 | \n",
"
\n",
" \n",
" 315 | \n",
" 0.582000 | \n",
"
\n",
" \n",
" 316 | \n",
" 0.574000 | \n",
"
\n",
" \n",
" 317 | \n",
" 0.600700 | \n",
"
\n",
" \n",
" 318 | \n",
" 0.599200 | \n",
"
\n",
" \n",
" 319 | \n",
" 0.596700 | \n",
"
\n",
" \n",
" 320 | \n",
" 0.620400 | \n",
"
\n",
" \n",
" 321 | \n",
" 0.579700 | \n",
"
\n",
" \n",
" 322 | \n",
" 0.666400 | \n",
"
\n",
" \n",
" 323 | \n",
" 0.576000 | \n",
"
\n",
" \n",
" 324 | \n",
" 0.644500 | \n",
"
\n",
" \n",
" 325 | \n",
" 0.593400 | \n",
"
\n",
" \n",
" 326 | \n",
" 0.624900 | \n",
"
\n",
" \n",
" 327 | \n",
" 0.577800 | \n",
"
\n",
" \n",
" 328 | \n",
" 0.618400 | \n",
"
\n",
" \n",
" 329 | \n",
" 0.586700 | \n",
"
\n",
" \n",
" 330 | \n",
" 0.608200 | \n",
"
\n",
" \n",
" 331 | \n",
" 0.598000 | \n",
"
\n",
" \n",
" 332 | \n",
" 0.580400 | \n",
"
\n",
" \n",
" 333 | \n",
" 0.624300 | \n",
"
\n",
" \n",
" 334 | \n",
" 0.567800 | \n",
"
\n",
" \n",
" 335 | \n",
" 0.593700 | \n",
"
\n",
" \n",
" 336 | \n",
" 0.554100 | \n",
"
\n",
" \n",
" 337 | \n",
" 0.719700 | \n",
"
\n",
" \n",
" 338 | \n",
" 0.551600 | \n",
"
\n",
" \n",
" 339 | \n",
" 0.565500 | \n",
"
\n",
" \n",
" 340 | \n",
" 0.590000 | \n",
"
\n",
" \n",
" 341 | \n",
" 0.591700 | \n",
"
\n",
" \n",
" 342 | \n",
" 0.584800 | \n",
"
\n",
" \n",
" 343 | \n",
" 0.605800 | \n",
"
\n",
" \n",
" 344 | \n",
" 0.641100 | \n",
"
\n",
" \n",
" 345 | \n",
" 0.588000 | \n",
"
\n",
" \n",
" 346 | \n",
" 0.615200 | \n",
"
\n",
" \n",
" 347 | \n",
" 0.567100 | \n",
"
\n",
" \n",
" 348 | \n",
" 0.610200 | \n",
"
\n",
" \n",
" 349 | \n",
" 0.626000 | \n",
"
\n",
" \n",
" 350 | \n",
" 0.610900 | \n",
"
\n",
" \n",
" 351 | \n",
" 0.591800 | \n",
"
\n",
" \n",
" 352 | \n",
" 0.585600 | \n",
"
\n",
" \n",
" 353 | \n",
" 0.599700 | \n",
"
\n",
" \n",
" 354 | \n",
" 0.606800 | \n",
"
\n",
" \n",
" 355 | \n",
" 0.571400 | \n",
"
\n",
" \n",
" 356 | \n",
" 0.612700 | \n",
"
\n",
" \n",
" 357 | \n",
" 0.585900 | \n",
"
\n",
" \n",
" 358 | \n",
" 0.625800 | \n",
"
\n",
" \n",
" 359 | \n",
" 0.642900 | \n",
"
\n",
" \n",
" 360 | \n",
" 0.550300 | \n",
"
\n",
" \n",
" 361 | \n",
" 0.566100 | \n",
"
\n",
" \n",
" 362 | \n",
" 0.604000 | \n",
"
\n",
" \n",
" 363 | \n",
" 0.600600 | \n",
"
\n",
" \n",
" 364 | \n",
" 0.627300 | \n",
"
\n",
" \n",
" 365 | \n",
" 0.521300 | \n",
"
\n",
" \n",
" 366 | \n",
" 0.622500 | \n",
"
\n",
" \n",
" 367 | \n",
" 0.562700 | \n",
"
\n",
" \n",
" 368 | \n",
" 0.577400 | \n",
"
\n",
" \n",
" 369 | \n",
" 0.546600 | \n",
"
\n",
" \n",
" 370 | \n",
" 0.576200 | \n",
"
\n",
" \n",
" 371 | \n",
" 0.582100 | \n",
"
\n",
" \n",
" 372 | \n",
" 0.604100 | \n",
"
\n",
" \n",
" 373 | \n",
" 0.632300 | \n",
"
\n",
" \n",
" 374 | \n",
" 0.626800 | \n",
"
\n",
" \n",
" 375 | \n",
" 0.593400 | \n",
"
\n",
" \n",
" 376 | \n",
" 0.614400 | \n",
"
\n",
" \n",
" 377 | \n",
" 0.566200 | \n",
"
\n",
" \n",
" 378 | \n",
" 0.608800 | \n",
"
\n",
" \n",
" 379 | \n",
" 0.562100 | \n",
"
\n",
" \n",
" 380 | \n",
" 0.564600 | \n",
"
\n",
" \n",
" 381 | \n",
" 0.576500 | \n",
"
\n",
" \n",
" 382 | \n",
" 0.572100 | \n",
"
\n",
" \n",
" 383 | \n",
" 0.573600 | \n",
"
\n",
" \n",
" 384 | \n",
" 0.600700 | \n",
"
\n",
" \n",
" 385 | \n",
" 0.500700 | \n",
"
\n",
" \n",
" 386 | \n",
" 0.618800 | \n",
"
\n",
" \n",
" 387 | \n",
" 0.561100 | \n",
"
\n",
" \n",
" 388 | \n",
" 0.605900 | \n",
"
\n",
" \n",
" 389 | \n",
" 0.579300 | \n",
"
\n",
" \n",
" 390 | \n",
" 0.615000 | \n",
"
\n",
" \n",
" 391 | \n",
" 0.540200 | \n",
"
\n",
" \n",
" 392 | \n",
" 0.561600 | \n",
"
\n",
" \n",
" 393 | \n",
" 0.563700 | \n",
"
\n",
" \n",
" 394 | \n",
" 0.573000 | \n",
"
\n",
" \n",
" 395 | \n",
" 0.597400 | \n",
"
\n",
" \n",
" 396 | \n",
" 0.554300 | \n",
"
\n",
" \n",
" 397 | \n",
" 0.565700 | \n",
"
\n",
" \n",
" 398 | \n",
" 0.620500 | \n",
"
\n",
" \n",
" 399 | \n",
" 0.513900 | \n",
"
\n",
" \n",
" 400 | \n",
" 0.539300 | \n",
"
\n",
" \n",
" 401 | \n",
" 0.609100 | \n",
"
\n",
" \n",
" 402 | \n",
" 0.547700 | \n",
"
\n",
" \n",
" 403 | \n",
" 0.557300 | \n",
"
\n",
" \n",
" 404 | \n",
" 0.585300 | \n",
"
\n",
" \n",
" 405 | \n",
" 0.586300 | \n",
"
\n",
" \n",
" 406 | \n",
" 0.598300 | \n",
"
\n",
" \n",
" 407 | \n",
" 0.547800 | \n",
"
\n",
" \n",
" 408 | \n",
" 0.530200 | \n",
"
\n",
" \n",
" 409 | \n",
" 0.620100 | \n",
"
\n",
" \n",
" 410 | \n",
" 0.568500 | \n",
"
\n",
" \n",
" 411 | \n",
" 0.596900 | \n",
"
\n",
" \n",
" 412 | \n",
" 0.610400 | \n",
"
\n",
" \n",
" 413 | \n",
" 0.587900 | \n",
"
\n",
" \n",
" 414 | \n",
" 0.553600 | \n",
"
\n",
" \n",
" 415 | \n",
" 0.608500 | \n",
"
\n",
" \n",
" 416 | \n",
" 0.519700 | \n",
"
\n",
" \n",
" 417 | \n",
" 0.613200 | \n",
"
\n",
" \n",
" 418 | \n",
" 0.579200 | \n",
"
\n",
" \n",
" 419 | \n",
" 0.613900 | \n",
"
\n",
" \n",
" 420 | \n",
" 0.596300 | \n",
"
\n",
" \n",
" 421 | \n",
" 0.546900 | \n",
"
\n",
" \n",
" 422 | \n",
" 0.589300 | \n",
"
\n",
" \n",
" 423 | \n",
" 0.589900 | \n",
"
\n",
" \n",
" 424 | \n",
" 0.580600 | \n",
"
\n",
" \n",
" 425 | \n",
" 0.584400 | \n",
"
\n",
" \n",
" 426 | \n",
" 0.639800 | \n",
"
\n",
" \n",
" 427 | \n",
" 0.584700 | \n",
"
\n",
" \n",
" 428 | \n",
" 0.596400 | \n",
"
\n",
" \n",
" 429 | \n",
" 0.532800 | \n",
"
\n",
" \n",
" 430 | \n",
" 0.629400 | \n",
"
\n",
" \n",
" 431 | \n",
" 0.560600 | \n",
"
\n",
" \n",
" 432 | \n",
" 0.565700 | \n",
"
\n",
" \n",
" 433 | \n",
" 0.570000 | \n",
"
\n",
" \n",
" 434 | \n",
" 0.595200 | \n",
"
\n",
" \n",
" 435 | \n",
" 0.554300 | \n",
"
\n",
" \n",
" 436 | \n",
" 0.626400 | \n",
"
\n",
" \n",
" 437 | \n",
" 0.611700 | \n",
"
\n",
" \n",
" 438 | \n",
" 0.584300 | \n",
"
\n",
" \n",
" 439 | \n",
" 0.574700 | \n",
"
\n",
" \n",
" 440 | \n",
" 0.611400 | \n",
"
\n",
" \n",
" 441 | \n",
" 0.554900 | \n",
"
\n",
" \n",
" 442 | \n",
" 0.586000 | \n",
"
\n",
" \n",
" 443 | \n",
" 0.594200 | \n",
"
\n",
" \n",
" 444 | \n",
" 0.532100 | \n",
"
\n",
" \n",
" 445 | \n",
" 0.580600 | \n",
"
\n",
" \n",
" 446 | \n",
" 0.590500 | \n",
"
\n",
" \n",
" 447 | \n",
" 0.551300 | \n",
"
\n",
" \n",
" 448 | \n",
" 0.556200 | \n",
"
\n",
" \n",
" 449 | \n",
" 0.566300 | \n",
"
\n",
" \n",
" 450 | \n",
" 0.600100 | \n",
"
\n",
" \n",
" 451 | \n",
" 0.597400 | \n",
"
\n",
" \n",
" 452 | \n",
" 0.526500 | \n",
"
\n",
" \n",
" 453 | \n",
" 0.609900 | \n",
"
\n",
" \n",
" 454 | \n",
" 0.572600 | \n",
"
\n",
" \n",
" 455 | \n",
" 0.629700 | \n",
"
\n",
" \n",
" 456 | \n",
" 0.509900 | \n",
"
\n",
" \n",
" 457 | \n",
" 0.585800 | \n",
"
\n",
" \n",
" 458 | \n",
" 0.569600 | \n",
"
\n",
" \n",
" 459 | \n",
" 0.541300 | \n",
"
\n",
" \n",
" 460 | \n",
" 0.525000 | \n",
"
\n",
" \n",
" 461 | \n",
" 0.543200 | \n",
"
\n",
" \n",
" 462 | \n",
" 0.597100 | \n",
"
\n",
" \n",
" 463 | \n",
" 0.539400 | \n",
"
\n",
" \n",
" 464 | \n",
" 0.566400 | \n",
"
\n",
" \n",
" 465 | \n",
" 0.594900 | \n",
"
\n",
" \n",
" 466 | \n",
" 0.595700 | \n",
"
\n",
" \n",
" 467 | \n",
" 0.530100 | \n",
"
\n",
" \n",
" 468 | \n",
" 0.525500 | \n",
"
\n",
" \n",
" 469 | \n",
" 0.540600 | \n",
"
\n",
" \n",
" 470 | \n",
" 0.577400 | \n",
"
\n",
" \n",
" 471 | \n",
" 0.543700 | \n",
"
\n",
" \n",
" 472 | \n",
" 0.534800 | \n",
"
\n",
" \n",
" 473 | \n",
" 0.607000 | \n",
"
\n",
" \n",
" 474 | \n",
" 0.624600 | \n",
"
\n",
" \n",
" 475 | \n",
" 0.571200 | \n",
"
\n",
" \n",
" 476 | \n",
" 0.500100 | \n",
"
\n",
" \n",
" 477 | \n",
" 0.571600 | \n",
"
\n",
" \n",
" 478 | \n",
" 0.548500 | \n",
"
\n",
" \n",
" 479 | \n",
" 0.546200 | \n",
"
\n",
" \n",
" 480 | \n",
" 0.550800 | \n",
"
\n",
" \n",
" 481 | \n",
" 0.553000 | \n",
"
\n",
" \n",
" 482 | \n",
" 0.541900 | \n",
"
\n",
" \n",
" 483 | \n",
" 0.520500 | \n",
"
\n",
" \n",
" 484 | \n",
" 0.566200 | \n",
"
\n",
" \n",
" 485 | \n",
" 0.573500 | \n",
"
\n",
" \n",
" 486 | \n",
" 0.581800 | \n",
"
\n",
" \n",
" 487 | \n",
" 0.622700 | \n",
"
\n",
" \n",
" 488 | \n",
" 0.547400 | \n",
"
\n",
" \n",
" 489 | \n",
" 0.566500 | \n",
"
\n",
" \n",
" 490 | \n",
" 0.542000 | \n",
"
\n",
" \n",
" 491 | \n",
" 0.544900 | \n",
"
\n",
" \n",
" 492 | \n",
" 0.541100 | \n",
"
\n",
" \n",
" 493 | \n",
" 0.515500 | \n",
"
\n",
" \n",
" 494 | \n",
" 0.587000 | \n",
"
\n",
" \n",
" 495 | \n",
" 0.518900 | \n",
"
\n",
" \n",
" 496 | \n",
" 0.514400 | \n",
"
\n",
" \n",
" 497 | \n",
" 0.545600 | \n",
"
\n",
" \n",
" 498 | \n",
" 0.595700 | \n",
"
\n",
" \n",
" 499 | \n",
" 0.551900 | \n",
"
\n",
" \n",
" 500 | \n",
" 0.539100 | \n",
"
\n",
" \n",
" 501 | \n",
" 0.548600 | \n",
"
\n",
" \n",
" 502 | \n",
" 0.556300 | \n",
"
\n",
" \n",
" 503 | \n",
" 0.523200 | \n",
"
\n",
" \n",
" 504 | \n",
" 0.556300 | \n",
"
\n",
" \n",
" 505 | \n",
" 0.558400 | \n",
"
\n",
" \n",
" 506 | \n",
" 0.508500 | \n",
"
\n",
" \n",
" 507 | \n",
" 0.553200 | \n",
"
\n",
" \n",
" 508 | \n",
" 0.557600 | \n",
"
\n",
" \n",
" 509 | \n",
" 0.572900 | \n",
"
\n",
" \n",
" 510 | \n",
" 0.597800 | \n",
"
\n",
" \n",
" 511 | \n",
" 0.524900 | \n",
"
\n",
" \n",
" 512 | \n",
" 0.529500 | \n",
"
\n",
" \n",
" 513 | \n",
" 0.566900 | \n",
"
\n",
" \n",
" 514 | \n",
" 0.562600 | \n",
"
\n",
" \n",
" 515 | \n",
" 0.546500 | \n",
"
\n",
" \n",
" 516 | \n",
" 0.517900 | \n",
"
\n",
" \n",
" 517 | \n",
" 0.531000 | \n",
"
\n",
" \n",
" 518 | \n",
" 0.571500 | \n",
"
\n",
" \n",
" 519 | \n",
" 0.503300 | \n",
"
\n",
" \n",
" 520 | \n",
" 0.578200 | \n",
"
\n",
" \n",
" 521 | \n",
" 0.598000 | \n",
"
\n",
" \n",
" 522 | \n",
" 0.505400 | \n",
"
\n",
" \n",
" 523 | \n",
" 0.533900 | \n",
"
\n",
" \n",
" 524 | \n",
" 0.527300 | \n",
"
\n",
" \n",
" 525 | \n",
" 0.552600 | \n",
"
\n",
" \n",
" 526 | \n",
" 0.554500 | \n",
"
\n",
" \n",
" 527 | \n",
" 0.534700 | \n",
"
\n",
" \n",
" 528 | \n",
" 0.561500 | \n",
"
\n",
" \n",
" 529 | \n",
" 0.553300 | \n",
"
\n",
" \n",
" 530 | \n",
" 0.509700 | \n",
"
\n",
" \n",
" 531 | \n",
" 0.531900 | \n",
"
\n",
" \n",
" 532 | \n",
" 0.525000 | \n",
"
\n",
" \n",
" 533 | \n",
" 0.571200 | \n",
"
\n",
" \n",
" 534 | \n",
" 0.525800 | \n",
"
\n",
" \n",
" 535 | \n",
" 0.593100 | \n",
"
\n",
" \n",
" 536 | \n",
" 0.545800 | \n",
"
\n",
" \n",
" 537 | \n",
" 0.522400 | \n",
"
\n",
" \n",
" 538 | \n",
" 0.588000 | \n",
"
\n",
" \n",
" 539 | \n",
" 0.556900 | \n",
"
\n",
" \n",
" 540 | \n",
" 0.553500 | \n",
"
\n",
" \n",
" 541 | \n",
" 0.561000 | \n",
"
\n",
" \n",
" 542 | \n",
" 0.546200 | \n",
"
\n",
" \n",
" 543 | \n",
" 0.510300 | \n",
"
\n",
" \n",
" 544 | \n",
" 0.552300 | \n",
"
\n",
" \n",
" 545 | \n",
" 0.526000 | \n",
"
\n",
" \n",
" 546 | \n",
" 0.531100 | \n",
"
\n",
" \n",
" 547 | \n",
" 0.509700 | \n",
"
\n",
" \n",
" 548 | \n",
" 0.482200 | \n",
"
\n",
" \n",
" 549 | \n",
" 0.547000 | \n",
"
\n",
" \n",
" 550 | \n",
" 0.532000 | \n",
"
\n",
" \n",
" 551 | \n",
" 0.534600 | \n",
"
\n",
" \n",
" 552 | \n",
" 0.546000 | \n",
"
\n",
" \n",
" 553 | \n",
" 0.542100 | \n",
"
\n",
" \n",
" 554 | \n",
" 0.518800 | \n",
"
\n",
" \n",
" 555 | \n",
" 0.603500 | \n",
"
\n",
" \n",
" 556 | \n",
" 0.514000 | \n",
"
\n",
" \n",
" 557 | \n",
" 0.538500 | \n",
"
\n",
" \n",
" 558 | \n",
" 0.551000 | \n",
"
\n",
" \n",
" 559 | \n",
" 0.548400 | \n",
"
\n",
" \n",
" 560 | \n",
" 0.542600 | \n",
"
\n",
" \n",
" 561 | \n",
" 0.533900 | \n",
"
\n",
" \n",
" 562 | \n",
" 0.572400 | \n",
"
\n",
" \n",
" 563 | \n",
" 0.556300 | \n",
"
\n",
" \n",
" 564 | \n",
" 0.538900 | \n",
"
\n",
" \n",
" 565 | \n",
" 0.586900 | \n",
"
\n",
" \n",
" 566 | \n",
" 0.518200 | \n",
"
\n",
" \n",
" 567 | \n",
" 0.472500 | \n",
"
\n",
" \n",
" 568 | \n",
" 0.554000 | \n",
"
\n",
" \n",
" 569 | \n",
" 0.530600 | \n",
"
\n",
" \n",
" 570 | \n",
" 0.552300 | \n",
"
\n",
" \n",
" 571 | \n",
" 0.523500 | \n",
"
\n",
" \n",
" 572 | \n",
" 0.586100 | \n",
"
\n",
" \n",
" 573 | \n",
" 0.540100 | \n",
"
\n",
" \n",
" 574 | \n",
" 0.561500 | \n",
"
\n",
" \n",
" 575 | \n",
" 0.540900 | \n",
"
\n",
" \n",
" 576 | \n",
" 0.525000 | \n",
"
\n",
" \n",
" 577 | \n",
" 0.542000 | \n",
"
\n",
" \n",
" 578 | \n",
" 0.605800 | \n",
"
\n",
" \n",
" 579 | \n",
" 0.549400 | \n",
"
\n",
" \n",
" 580 | \n",
" 0.508100 | \n",
"
\n",
" \n",
" 581 | \n",
" 0.523500 | \n",
"
\n",
" \n",
" 582 | \n",
" 0.526300 | \n",
"
\n",
" \n",
" 583 | \n",
" 0.521100 | \n",
"
\n",
" \n",
" 584 | \n",
" 0.525300 | \n",
"
\n",
" \n",
" 585 | \n",
" 0.523600 | \n",
"
\n",
" \n",
" 586 | \n",
" 0.506800 | \n",
"
\n",
" \n",
" 587 | \n",
" 0.547200 | \n",
"
\n",
" \n",
" 588 | \n",
" 0.550000 | \n",
"
\n",
" \n",
" 589 | \n",
" 0.571600 | \n",
"
\n",
" \n",
" 590 | \n",
" 0.539200 | \n",
"
\n",
" \n",
" 591 | \n",
" 0.561000 | \n",
"
\n",
" \n",
" 592 | \n",
" 0.529800 | \n",
"
\n",
" \n",
" 593 | \n",
" 0.488400 | \n",
"
\n",
" \n",
" 594 | \n",
" 0.512300 | \n",
"
\n",
" \n",
" 595 | \n",
" 0.503700 | \n",
"
\n",
" \n",
" 596 | \n",
" 0.520400 | \n",
"
\n",
" \n",
" 597 | \n",
" 0.523200 | \n",
"
\n",
" \n",
" 598 | \n",
" 0.527600 | \n",
"
\n",
" \n",
" 599 | \n",
" 0.569400 | \n",
"
\n",
" \n",
" 600 | \n",
" 0.515700 | \n",
"
\n",
" \n",
" 601 | \n",
" 0.540700 | \n",
"
\n",
" \n",
" 602 | \n",
" 0.504500 | \n",
"
\n",
" \n",
" 603 | \n",
" 0.523900 | \n",
"
\n",
" \n",
" 604 | \n",
" 0.527400 | \n",
"
\n",
" \n",
" 605 | \n",
" 0.539900 | \n",
"
\n",
" \n",
" 606 | \n",
" 0.507100 | \n",
"
\n",
" \n",
" 607 | \n",
" 0.484200 | \n",
"
\n",
" \n",
" 608 | \n",
" 0.525100 | \n",
"
\n",
" \n",
" 609 | \n",
" 0.568100 | \n",
"
\n",
" \n",
" 610 | \n",
" 0.565100 | \n",
"
\n",
" \n",
" 611 | \n",
" 0.535700 | \n",
"
\n",
" \n",
" 612 | \n",
" 0.507300 | \n",
"
\n",
" \n",
" 613 | \n",
" 0.529300 | \n",
"
\n",
" \n",
" 614 | \n",
" 0.543900 | \n",
"
\n",
" \n",
" 615 | \n",
" 0.531400 | \n",
"
\n",
" \n",
" 616 | \n",
" 0.520300 | \n",
"
\n",
" \n",
" 617 | \n",
" 0.527800 | \n",
"
\n",
" \n",
" 618 | \n",
" 0.560800 | \n",
"
\n",
" \n",
" 619 | \n",
" 0.522200 | \n",
"
\n",
" \n",
" 620 | \n",
" 0.491600 | \n",
"
\n",
" \n",
" 621 | \n",
" 0.548300 | \n",
"
\n",
" \n",
" 622 | \n",
" 0.560200 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n",
"/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n",
"/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6c3e01f84ff845d697a1bbe5abb1ba5a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"adapter_model.safetensors: 0%| | 0.00/50.5M [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# start training, the model will be automatically saved to the hub and the output directory\n",
"trainer.train()\n",
"\n",
"# save model\n",
"trainer.save_model()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "6bf54afa-aeee-4bdc-ae7d-aa11bc0d2d6e",
"metadata": {},
"outputs": [],
"source": [
"# free the memory again\n",
"del model\n",
"del trainer\n",
"torch.cuda.empty_cache()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "9dceb05a-8231-4b27-b35e-a0357be65c79",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
]
}
],
"source": [
"\n",
"#### COMMENT IN TO MERGE PEFT AND BASE MODEL ####\n",
"from peft import PeftModel, PeftConfig\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
"from peft import AutoPeftModelForCausalLM\n",
"\n",
"# # Load PEFT model on CPU\n",
"config = PeftConfig.from_pretrained(args.output_dir)\n",
"model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,low_cpu_mem_usage=True)\n",
"tokenizer = AutoTokenizer.from_pretrained(args.output_dir)\n",
"model.resize_token_embeddings(len(tokenizer))\n",
"model = PeftModel.from_pretrained(model, args.output_dir)\n",
"model = AutoPeftModelForCausalLM.from_pretrained(\n",
" args.output_dir,\n",
" torch_dtype=torch.float16,\n",
" low_cpu_mem_usage=True,\n",
")\n",
"# # Merge LoRA and base model and save\n",
"merged_model = model.merge_and_unload()\n",
"merged_model.save_pretrained(args.output_dir,safe_serialization=True, max_shard_size=\"2GB\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "33a1999f-a6dc-42b3-8204-0a3f4103fdfd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PersimmonForCausalLM', 'PhiForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForCausalLM', 'RoCBertForCausalLM', 'RoFormerForCausalLM', 'RwkvForCausalLM', 'Speech2Text2ForCausalLM', 'TransfoXLLMHeadModel', 'TrOCRForCausalLM', 'WhisperForCausalLM', 'XGLMForCausalLM', 'XLMWithLMHeadModel', 'XLMProphetNetForCausalLM', 'XLMRobertaForCausalLM', 'XLMRobertaXLForCausalLM', 'XLNetLMHeadModel', 'XmodForCausalLM'].\n"
]
}
],
"source": [
"import torch\n",
"from peft import AutoPeftModelForCausalLM\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"#peft_model_id = \"./tinyllama_hindi_sft_sentence_retrieval\"\n",
"peft_model_id = args.output_dir\n",
"\n",
"# Load Model with PEFT adapter\n",
"model = AutoPeftModelForCausalLM.from_pretrained(\n",
" peft_model_id,\n",
" device_map=\"auto\",\n",
" torch_dtype=torch.float16\n",
")\n",
"# load into pipeline\n",
"pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "0fc13edb-b8a4-40cd-b354-c4b8b36ff284",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4748f445714646138b2f55643478a4b1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating train split: 0 examples [00:00, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:389: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.1` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
" warnings.warn(\n",
"/opt/conda/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:394: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.1` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
" warnings.warn(\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [32,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [33,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [34,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [35,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [36,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [37,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [38,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [39,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [40,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [41,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [42,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [43,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [44,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [45,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [46,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [47,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [48,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [49,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [50,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [51,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [52,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [53,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [54,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [55,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [56,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [57,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [58,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [59,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [60,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [61,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [62,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n",
"../aten/src/ATen/native/cuda/Indexing.cu:1292: indexSelectLargeIndex: block: [292,0,0], thread: [63,0,0] Assertion `srcIndex < srcSelectDimSize` failed.\n"
]
},
{
"ename": "RuntimeError",
"evalue": "CUDA error: device-side assert triggered\nCUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1.\nCompile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\n",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[13], line 11\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Test on sample\u001b[39;00m\n\u001b[1;32m 10\u001b[0m prompt \u001b[38;5;241m=\u001b[39m pipe\u001b[38;5;241m.\u001b[39mtokenizer\u001b[38;5;241m.\u001b[39mapply_chat_template(eval_dataset[rand_idx][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m][:\u001b[38;5;241m2\u001b[39m], tokenize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, add_generation_prompt\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 11\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_new_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m256\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdo_sample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_k\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m50\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43meos_token_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43meos_token_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpad_token_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpad_token_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mQuery:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00meval_dataset[rand_idx][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m'\u001b[39m][\u001b[38;5;241m1\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOriginal Answer:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00meval_dataset[rand_idx][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m'\u001b[39m][\u001b[38;5;241m2\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/pipelines/text_generation.py:208\u001b[0m, in \u001b[0;36mTextGenerationPipeline.__call__\u001b[0;34m(self, text_inputs, **kwargs)\u001b[0m\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, text_inputs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 168\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 169\u001b[0m \u001b[38;5;124;03m Complete the prompt(s) given as inputs.\u001b[39;00m\n\u001b[1;32m 170\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[38;5;124;03m ids of the generated text.\u001b[39;00m\n\u001b[1;32m 207\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 208\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__call__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtext_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/pipelines/base.py:1140\u001b[0m, in \u001b[0;36mPipeline.__call__\u001b[0;34m(self, inputs, num_workers, batch_size, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mnext\u001b[39m(\n\u001b[1;32m 1133\u001b[0m \u001b[38;5;28miter\u001b[39m(\n\u001b[1;32m 1134\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_iterator(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1137\u001b[0m )\n\u001b[1;32m 1138\u001b[0m )\n\u001b[1;32m 1139\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1140\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_single\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpreprocess_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforward_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpostprocess_params\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/pipelines/base.py:1147\u001b[0m, in \u001b[0;36mPipeline.run_single\u001b[0;34m(self, inputs, preprocess_params, forward_params, postprocess_params)\u001b[0m\n\u001b[1;32m 1145\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_single\u001b[39m(\u001b[38;5;28mself\u001b[39m, inputs, preprocess_params, forward_params, postprocess_params):\n\u001b[1;32m 1146\u001b[0m model_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpreprocess(inputs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mpreprocess_params)\n\u001b[0;32m-> 1147\u001b[0m model_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mforward_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1148\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpostprocess(model_outputs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mpostprocess_params)\n\u001b[1;32m 1149\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m outputs\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/pipelines/base.py:1046\u001b[0m, in \u001b[0;36mPipeline.forward\u001b[0;34m(self, model_inputs, **forward_params)\u001b[0m\n\u001b[1;32m 1044\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m inference_context():\n\u001b[1;32m 1045\u001b[0m model_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ensure_tensor_on_device(model_inputs, device\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[0;32m-> 1046\u001b[0m model_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mforward_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1047\u001b[0m model_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ensure_tensor_on_device(model_outputs, device\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mdevice(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 1048\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/pipelines/text_generation.py:271\u001b[0m, in \u001b[0;36mTextGenerationPipeline._forward\u001b[0;34m(self, model_inputs, **generate_kwargs)\u001b[0m\n\u001b[1;32m 268\u001b[0m generate_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmin_length\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m prefix_length\n\u001b[1;32m 270\u001b[0m \u001b[38;5;66;03m# BS x SL\u001b[39;00m\n\u001b[0;32m--> 271\u001b[0m generated_sequence \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mgenerate_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 272\u001b[0m out_b \u001b[38;5;241m=\u001b[39m generated_sequence\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 273\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mframework \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/peft/peft_model.py:1130\u001b[0m, in \u001b[0;36mPeftModelForCausalLM.generate\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_model\u001b[38;5;241m.\u001b[39mgeneration_config \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgeneration_config\n\u001b[1;32m 1129\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1130\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1131\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m:\n\u001b[1;32m 1132\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_model\u001b[38;5;241m.\u001b[39mprepare_inputs_for_generation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_model_prepare_inputs_for_generation\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator..decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1718\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m 1701\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39massisted_decoding(\n\u001b[1;32m 1702\u001b[0m input_ids,\n\u001b[1;32m 1703\u001b[0m assistant_model\u001b[38;5;241m=\u001b[39massistant_model,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1714\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[1;32m 1715\u001b[0m )\n\u001b[1;32m 1716\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m generation_mode \u001b[38;5;241m==\u001b[39m GenerationMode\u001b[38;5;241m.\u001b[39mGREEDY_SEARCH:\n\u001b[1;32m 1717\u001b[0m \u001b[38;5;66;03m# 11. run greedy search\u001b[39;00m\n\u001b[0;32m-> 1718\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgreedy_search\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1719\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1720\u001b[0m \u001b[43m \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlogits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1721\u001b[0m \u001b[43m \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1722\u001b[0m \u001b[43m \u001b[49m\u001b[43mpad_token_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpad_token_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1723\u001b[0m \u001b[43m \u001b[49m\u001b[43meos_token_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43meos_token_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1724\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_scores\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_scores\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1725\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict_in_generate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreturn_dict_in_generate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1726\u001b[0m \u001b[43m \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1727\u001b[0m \u001b[43m \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1728\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1729\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1731\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;241m==\u001b[39m GenerationMode\u001b[38;5;241m.\u001b[39mCONTRASTIVE_SEARCH:\n\u001b[1;32m 1732\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m model_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muse_cache\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:2579\u001b[0m, in \u001b[0;36mGenerationMixin.greedy_search\u001b[0;34m(self, input_ids, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)\u001b[0m\n\u001b[1;32m 2576\u001b[0m model_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_inputs_for_generation(input_ids, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs)\n\u001b[1;32m 2578\u001b[0m \u001b[38;5;66;03m# forward pass to get next token\u001b[39;00m\n\u001b[0;32m-> 2579\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2580\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2581\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 2582\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2583\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2584\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m synced_gpus \u001b[38;5;129;01mand\u001b[39;00m this_peer_finished:\n\u001b[1;32m 2587\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m \u001b[38;5;66;03m# don't waste resources running the code we don't need\u001b[39;00m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:1181\u001b[0m, in \u001b[0;36mLlamaForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1178\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[1;32m 1180\u001b[0m \u001b[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[0;32m-> 1181\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1182\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1183\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1184\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1185\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1186\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1187\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1188\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1189\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1190\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1191\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1193\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1194\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mpretraining_tp \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:1033\u001b[0m, in \u001b[0;36mLlamaModel.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1029\u001b[0m attention_mask \u001b[38;5;241m=\u001b[39m attention_mask \u001b[38;5;28;01mif\u001b[39;00m (attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01min\u001b[39;00m attention_mask) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1030\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_use_sdpa \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m output_attentions:\n\u001b[1;32m 1031\u001b[0m \u001b[38;5;66;03m# output_attentions=True can not be supported when using SDPA, and we fall back on\u001b[39;00m\n\u001b[1;32m 1032\u001b[0m \u001b[38;5;66;03m# the manual implementation that requires a 4D causal mask in all cases.\u001b[39;00m\n\u001b[0;32m-> 1033\u001b[0m attention_mask \u001b[38;5;241m=\u001b[39m \u001b[43m_prepare_4d_causal_attention_mask_for_sdpa\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1034\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1035\u001b[0m \u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseq_length\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1036\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1037\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_values_length\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1038\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1039\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1040\u001b[0m \u001b[38;5;66;03m# 4d mask is passed through the layers\u001b[39;00m\n\u001b[1;32m 1041\u001b[0m attention_mask \u001b[38;5;241m=\u001b[39m _prepare_4d_causal_attention_mask(\n\u001b[1;32m 1042\u001b[0m attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length\n\u001b[1;32m 1043\u001b[0m )\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/modeling_attn_mask_utils.py:343\u001b[0m, in \u001b[0;36m_prepare_4d_causal_attention_mask_for_sdpa\u001b[0;34m(attention_mask, input_shape, inputs_embeds, past_key_values_length, sliding_window)\u001b[0m\n\u001b[1;32m 340\u001b[0m is_tracing \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mjit\u001b[38;5;241m.\u001b[39mis_tracing()\n\u001b[1;32m 342\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mall(attention_mask \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_tracing:\n\u001b[1;32m 345\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n",
"\u001b[0;31mRuntimeError\u001b[0m: CUDA error: device-side assert triggered\nCUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1.\nCompile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\n"
]
}
],
"source": [
"from datasets import load_dataset\n",
"from random import randint\n",
"\n",
"\n",
"# Load our test dataset\n",
"eval_dataset = load_dataset(\"json\", data_files=\"test_dataset.json\", split=\"train\")\n",
"rand_idx = randint(0, len(eval_dataset))\n",
"\n",
"# Test on sample\n",
"prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx][\"messages\"][:2], tokenize=False, add_generation_prompt=True)\n",
"outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)\n",
"\n",
"print(f\"Query:\\n{eval_dataset[rand_idx]['messages'][1]['content']}\")\n",
"print(f\"Original Answer:\\n{eval_dataset[rand_idx]['messages'][2]['content']}\")\n",
"print(f\"Generated Answer:\\n{outputs[0]['generated_text'][len(prompt):].strip()}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d39baba5-8003-4451-b350-8fd7677edc30",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}