Adding `safetensors` variant of this model

#3
by agemagician - opened
This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +71 -0
  2. model.safetensors.index.json +1 -0
  3. model_00001-of-00072.safetensors +3 -0
  4. model_00002-of-00072.safetensors +3 -0
  5. model_00003-of-00072.safetensors +3 -0
  6. model_00004-of-00072.safetensors +3 -0
  7. model_00005-of-00072.safetensors +3 -0
  8. model_00006-of-00072.safetensors +3 -0
  9. model_00007-of-00072.safetensors +3 -0
  10. model_00008-of-00072.safetensors +3 -0
  11. model_00009-of-00072.safetensors +3 -0
  12. model_00010-of-00072.safetensors +3 -0
  13. model_00011-of-00072.safetensors +3 -0
  14. model_00012-of-00072.safetensors +3 -0
  15. model_00013-of-00072.safetensors +3 -0
  16. model_00014-of-00072.safetensors +3 -0
  17. model_00015-of-00072.safetensors +3 -0
  18. model_00016-of-00072.safetensors +3 -0
  19. model_00017-of-00072.safetensors +3 -0
  20. model_00018-of-00072.safetensors +3 -0
  21. model_00019-of-00072.safetensors +3 -0
  22. model_00020-of-00072.safetensors +3 -0
  23. model_00021-of-00072.safetensors +3 -0
  24. model_00022-of-00072.safetensors +3 -0
  25. model_00023-of-00072.safetensors +3 -0
  26. model_00024-of-00072.safetensors +3 -0
  27. model_00025-of-00072.safetensors +3 -0
  28. model_00026-of-00072.safetensors +3 -0
  29. model_00027-of-00072.safetensors +3 -0
  30. model_00028-of-00072.safetensors +3 -0
  31. model_00029-of-00072.safetensors +3 -0
  32. model_00030-of-00072.safetensors +3 -0
  33. model_00031-of-00072.safetensors +3 -0
  34. model_00032-of-00072.safetensors +3 -0
  35. model_00033-of-00072.safetensors +3 -0
  36. model_00034-of-00072.safetensors +3 -0
  37. model_00035-of-00072.safetensors +3 -0
  38. model_00036-of-00072.safetensors +3 -0
  39. model_00037-of-00072.safetensors +3 -0
  40. model_00038-of-00072.safetensors +3 -0
  41. model_00039-of-00072.safetensors +3 -0
  42. model_00040-of-00072.safetensors +3 -0
  43. model_00041-of-00072.safetensors +3 -0
  44. model_00042-of-00072.safetensors +3 -0
  45. model_00043-of-00072.safetensors +3 -0
  46. model_00044-of-00072.safetensors +3 -0
  47. model_00045-of-00072.safetensors +3 -0
  48. model_00046-of-00072.safetensors +3 -0
  49. model_00047-of-00072.safetensors +3 -0
  50. model_00048-of-00072.safetensors +3 -0
.gitattributes CHANGED
@@ -36,3 +36,74 @@ logs/logs/xp3capmixnewcodelonglossseq/main_log.txt filter=lfs diff=lfs merge=lfs
36
  logs/logs/xp3mt/main_log.txt filter=lfs diff=lfs merge=lfs -text
37
  logs/logs/xp3zhmt/main_log.txt filter=lfs diff=lfs merge=lfs -text
38
  logs/logs/xp3zzlossseq/main_log.txt filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  logs/logs/xp3mt/main_log.txt filter=lfs diff=lfs merge=lfs -text
37
  logs/logs/xp3zhmt/main_log.txt filter=lfs diff=lfs merge=lfs -text
38
  logs/logs/xp3zzlossseq/main_log.txt filter=lfs diff=lfs merge=lfs -text
39
+ model_00021-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
40
+ model_00027-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
41
+ model_00005-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
42
+ model_00031-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
43
+ model_00038-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
44
+ model_00034-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
45
+ model_00060-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
46
+ model_00020-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
47
+ model_00037-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
48
+ model_00045-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
49
+ model_00043-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
50
+ model_00006-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
51
+ model_00070-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
52
+ model_00015-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
53
+ model_00025-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
54
+ model_00065-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
55
+ model_00062-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
56
+ model_00003-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
57
+ model_00033-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
58
+ model_00022-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
59
+ model_00019-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
60
+ model_00016-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
61
+ model_00056-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
62
+ model_00004-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
63
+ model_00053-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
64
+ model_00058-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
65
+ model_00047-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
66
+ model_00055-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
67
+ model_00061-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
68
+ model_00041-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
69
+ model_00011-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
70
+ model_00032-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
71
+ model_00046-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
72
+ model_00013-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
73
+ model_00001-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
74
+ model_00042-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
75
+ model_00008-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
76
+ model_00068-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
77
+ model_00017-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
78
+ model_00048-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
79
+ model_00002-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
80
+ model_00030-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
81
+ model_00054-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
82
+ model_00069-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
83
+ model_00050-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
84
+ model_00010-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
85
+ model_00063-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
86
+ model_00014-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
87
+ model_00059-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
88
+ model_00052-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
89
+ model_00067-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
90
+ model_00023-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
91
+ model_00049-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
92
+ model_00018-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
93
+ model_00044-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
94
+ model_00035-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
95
+ model_00057-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
96
+ model_00028-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
97
+ model_00051-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
98
+ model_00024-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
99
+ model_00064-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
100
+ model_00029-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
101
+ model_00039-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
102
+ model_00071-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
103
+ model_00009-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
104
+ model_00007-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
105
+ model_00026-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
106
+ model_00012-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
107
+ model_00040-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
108
+ model_00036-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
109
+ model_00066-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"total_size": 352494542848}, "weight_map": {"h.0.input_layernorm.bias": "model_00002-of-00072.safetensors", "h.0.input_layernorm.weight": "model_00002-of-00072.safetensors", "h.0.mlp.dense_4h_to_h.bias": "model_00002-of-00072.safetensors", "h.0.mlp.dense_4h_to_h.weight": "model_00002-of-00072.safetensors", "h.0.mlp.dense_h_to_4h.bias": "model_00002-of-00072.safetensors", "h.0.mlp.dense_h_to_4h.weight": "model_00002-of-00072.safetensors", "h.0.post_attention_layernorm.bias": "model_00002-of-00072.safetensors", "h.0.post_attention_layernorm.weight": "model_00002-of-00072.safetensors", "h.0.self_attention.dense.bias": "model_00002-of-00072.safetensors", "h.0.self_attention.dense.weight": "model_00002-of-00072.safetensors", "h.0.self_attention.query_key_value.bias": "model_00002-of-00072.safetensors", "h.0.self_attention.query_key_value.weight": "model_00002-of-00072.safetensors", "h.1.input_layernorm.bias": "model_00003-of-00072.safetensors", "h.1.input_layernorm.weight": "model_00003-of-00072.safetensors", "h.1.mlp.dense_4h_to_h.bias": "model_00003-of-00072.safetensors", "h.1.mlp.dense_4h_to_h.weight": "model_00003-of-00072.safetensors", "h.1.mlp.dense_h_to_4h.bias": "model_00003-of-00072.safetensors", "h.1.mlp.dense_h_to_4h.weight": "model_00003-of-00072.safetensors", "h.1.post_attention_layernorm.bias": "model_00003-of-00072.safetensors", "h.1.post_attention_layernorm.weight": "model_00003-of-00072.safetensors", "h.1.self_attention.dense.bias": "model_00003-of-00072.safetensors", "h.1.self_attention.dense.weight": "model_00003-of-00072.safetensors", "h.1.self_attention.query_key_value.bias": "model_00003-of-00072.safetensors", "h.1.self_attention.query_key_value.weight": "model_00003-of-00072.safetensors", "h.10.input_layernorm.bias": "model_00012-of-00072.safetensors", "h.10.input_layernorm.weight": "model_00012-of-00072.safetensors", "h.10.mlp.dense_4h_to_h.bias": "model_00012-of-00072.safetensors", "h.10.mlp.dense_4h_to_h.weight": "model_00012-of-00072.safetensors", "h.10.mlp.dense_h_to_4h.bias": "model_00012-of-00072.safetensors", "h.10.mlp.dense_h_to_4h.weight": "model_00012-of-00072.safetensors", "h.10.post_attention_layernorm.bias": "model_00012-of-00072.safetensors", "h.10.post_attention_layernorm.weight": "model_00012-of-00072.safetensors", "h.10.self_attention.dense.bias": "model_00012-of-00072.safetensors", "h.10.self_attention.dense.weight": "model_00012-of-00072.safetensors", "h.10.self_attention.query_key_value.bias": "model_00012-of-00072.safetensors", "h.10.self_attention.query_key_value.weight": "model_00012-of-00072.safetensors", "h.11.input_layernorm.bias": "model_00013-of-00072.safetensors", "h.11.input_layernorm.weight": "model_00013-of-00072.safetensors", "h.11.mlp.dense_4h_to_h.bias": "model_00013-of-00072.safetensors", "h.11.mlp.dense_4h_to_h.weight": "model_00013-of-00072.safetensors", "h.11.mlp.dense_h_to_4h.bias": "model_00013-of-00072.safetensors", "h.11.mlp.dense_h_to_4h.weight": "model_00013-of-00072.safetensors", "h.11.post_attention_layernorm.bias": "model_00013-of-00072.safetensors", "h.11.post_attention_layernorm.weight": "model_00013-of-00072.safetensors", "h.11.self_attention.dense.bias": "model_00013-of-00072.safetensors", "h.11.self_attention.dense.weight": "model_00013-of-00072.safetensors", "h.11.self_attention.query_key_value.bias": "model_00013-of-00072.safetensors", "h.11.self_attention.query_key_value.weight": "model_00013-of-00072.safetensors", "h.12.input_layernorm.bias": "model_00014-of-00072.safetensors", "h.12.input_layernorm.weight": "model_00014-of-00072.safetensors", "h.12.mlp.dense_4h_to_h.bias": "model_00014-of-00072.safetensors", "h.12.mlp.dense_4h_to_h.weight": "model_00014-of-00072.safetensors", "h.12.mlp.dense_h_to_4h.bias": "model_00014-of-00072.safetensors", "h.12.mlp.dense_h_to_4h.weight": "model_00014-of-00072.safetensors", "h.12.post_attention_layernorm.bias": "model_00014-of-00072.safetensors", "h.12.post_attention_layernorm.weight": "model_00014-of-00072.safetensors", "h.12.self_attention.dense.bias": "model_00014-of-00072.safetensors", "h.12.self_attention.dense.weight": "model_00014-of-00072.safetensors", "h.12.self_attention.query_key_value.bias": "model_00014-of-00072.safetensors", "h.12.self_attention.query_key_value.weight": "model_00014-of-00072.safetensors", "h.13.input_layernorm.bias": "model_00015-of-00072.safetensors", "h.13.input_layernorm.weight": "model_00015-of-00072.safetensors", "h.13.mlp.dense_4h_to_h.bias": "model_00015-of-00072.safetensors", "h.13.mlp.dense_4h_to_h.weight": "model_00015-of-00072.safetensors", "h.13.mlp.dense_h_to_4h.bias": "model_00015-of-00072.safetensors", "h.13.mlp.dense_h_to_4h.weight": "model_00015-of-00072.safetensors", "h.13.post_attention_layernorm.bias": "model_00015-of-00072.safetensors", "h.13.post_attention_layernorm.weight": "model_00015-of-00072.safetensors", "h.13.self_attention.dense.bias": "model_00015-of-00072.safetensors", "h.13.self_attention.dense.weight": "model_00015-of-00072.safetensors", "h.13.self_attention.query_key_value.bias": "model_00015-of-00072.safetensors", "h.13.self_attention.query_key_value.weight": "model_00015-of-00072.safetensors", "h.14.input_layernorm.bias": "model_00016-of-00072.safetensors", "h.14.input_layernorm.weight": "model_00016-of-00072.safetensors", "h.14.mlp.dense_4h_to_h.bias": "model_00016-of-00072.safetensors", "h.14.mlp.dense_4h_to_h.weight": "model_00016-of-00072.safetensors", "h.14.mlp.dense_h_to_4h.bias": "model_00016-of-00072.safetensors", "h.14.mlp.dense_h_to_4h.weight": "model_00016-of-00072.safetensors", "h.14.post_attention_layernorm.bias": "model_00016-of-00072.safetensors", "h.14.post_attention_layernorm.weight": "model_00016-of-00072.safetensors", "h.14.self_attention.dense.bias": "model_00016-of-00072.safetensors", "h.14.self_attention.dense.weight": "model_00016-of-00072.safetensors", "h.14.self_attention.query_key_value.bias": "model_00016-of-00072.safetensors", "h.14.self_attention.query_key_value.weight": "model_00016-of-00072.safetensors", "h.15.input_layernorm.bias": "model_00017-of-00072.safetensors", "h.15.input_layernorm.weight": "model_00017-of-00072.safetensors", "h.15.mlp.dense_4h_to_h.bias": "model_00017-of-00072.safetensors", "h.15.mlp.dense_4h_to_h.weight": "model_00017-of-00072.safetensors", "h.15.mlp.dense_h_to_4h.bias": "model_00017-of-00072.safetensors", "h.15.mlp.dense_h_to_4h.weight": "model_00017-of-00072.safetensors", "h.15.post_attention_layernorm.bias": "model_00017-of-00072.safetensors", "h.15.post_attention_layernorm.weight": "model_00017-of-00072.safetensors", "h.15.self_attention.dense.bias": "model_00017-of-00072.safetensors", "h.15.self_attention.dense.weight": "model_00017-of-00072.safetensors", "h.15.self_attention.query_key_value.bias": "model_00017-of-00072.safetensors", "h.15.self_attention.query_key_value.weight": "model_00017-of-00072.safetensors", "h.16.input_layernorm.bias": "model_00018-of-00072.safetensors", "h.16.input_layernorm.weight": "model_00018-of-00072.safetensors", "h.16.mlp.dense_4h_to_h.bias": "model_00018-of-00072.safetensors", "h.16.mlp.dense_4h_to_h.weight": "model_00018-of-00072.safetensors", "h.16.mlp.dense_h_to_4h.bias": "model_00018-of-00072.safetensors", "h.16.mlp.dense_h_to_4h.weight": "model_00018-of-00072.safetensors", "h.16.post_attention_layernorm.bias": "model_00018-of-00072.safetensors", "h.16.post_attention_layernorm.weight": "model_00018-of-00072.safetensors", "h.16.self_attention.dense.bias": "model_00018-of-00072.safetensors", "h.16.self_attention.dense.weight": "model_00018-of-00072.safetensors", "h.16.self_attention.query_key_value.bias": "model_00018-of-00072.safetensors", "h.16.self_attention.query_key_value.weight": "model_00018-of-00072.safetensors", "h.17.input_layernorm.bias": "model_00019-of-00072.safetensors", "h.17.input_layernorm.weight": "model_00019-of-00072.safetensors", "h.17.mlp.dense_4h_to_h.bias": "model_00019-of-00072.safetensors", "h.17.mlp.dense_4h_to_h.weight": "model_00019-of-00072.safetensors", "h.17.mlp.dense_h_to_4h.bias": "model_00019-of-00072.safetensors", "h.17.mlp.dense_h_to_4h.weight": "model_00019-of-00072.safetensors", "h.17.post_attention_layernorm.bias": "model_00019-of-00072.safetensors", "h.17.post_attention_layernorm.weight": "model_00019-of-00072.safetensors", "h.17.self_attention.dense.bias": "model_00019-of-00072.safetensors", "h.17.self_attention.dense.weight": "model_00019-of-00072.safetensors", "h.17.self_attention.query_key_value.bias": "model_00019-of-00072.safetensors", "h.17.self_attention.query_key_value.weight": "model_00019-of-00072.safetensors", "h.18.input_layernorm.bias": "model_00020-of-00072.safetensors", "h.18.input_layernorm.weight": "model_00020-of-00072.safetensors", "h.18.mlp.dense_4h_to_h.bias": "model_00020-of-00072.safetensors", "h.18.mlp.dense_4h_to_h.weight": "model_00020-of-00072.safetensors", "h.18.mlp.dense_h_to_4h.bias": "model_00020-of-00072.safetensors", "h.18.mlp.dense_h_to_4h.weight": "model_00020-of-00072.safetensors", "h.18.post_attention_layernorm.bias": "model_00020-of-00072.safetensors", "h.18.post_attention_layernorm.weight": "model_00020-of-00072.safetensors", "h.18.self_attention.dense.bias": "model_00020-of-00072.safetensors", "h.18.self_attention.dense.weight": "model_00020-of-00072.safetensors", "h.18.self_attention.query_key_value.bias": "model_00020-of-00072.safetensors", "h.18.self_attention.query_key_value.weight": "model_00020-of-00072.safetensors", "h.19.input_layernorm.bias": "model_00021-of-00072.safetensors", "h.19.input_layernorm.weight": "model_00021-of-00072.safetensors", "h.19.mlp.dense_4h_to_h.bias": "model_00021-of-00072.safetensors", "h.19.mlp.dense_4h_to_h.weight": "model_00021-of-00072.safetensors", "h.19.mlp.dense_h_to_4h.bias": "model_00021-of-00072.safetensors", "h.19.mlp.dense_h_to_4h.weight": "model_00021-of-00072.safetensors", "h.19.post_attention_layernorm.bias": "model_00021-of-00072.safetensors", "h.19.post_attention_layernorm.weight": "model_00021-of-00072.safetensors", "h.19.self_attention.dense.bias": "model_00021-of-00072.safetensors", "h.19.self_attention.dense.weight": "model_00021-of-00072.safetensors", "h.19.self_attention.query_key_value.bias": "model_00021-of-00072.safetensors", "h.19.self_attention.query_key_value.weight": "model_00021-of-00072.safetensors", "h.2.input_layernorm.bias": "model_00004-of-00072.safetensors", "h.2.input_layernorm.weight": "model_00004-of-00072.safetensors", "h.2.mlp.dense_4h_to_h.bias": "model_00004-of-00072.safetensors", "h.2.mlp.dense_4h_to_h.weight": "model_00004-of-00072.safetensors", "h.2.mlp.dense_h_to_4h.bias": "model_00004-of-00072.safetensors", "h.2.mlp.dense_h_to_4h.weight": "model_00004-of-00072.safetensors", "h.2.post_attention_layernorm.bias": "model_00004-of-00072.safetensors", "h.2.post_attention_layernorm.weight": "model_00004-of-00072.safetensors", "h.2.self_attention.dense.bias": "model_00004-of-00072.safetensors", "h.2.self_attention.dense.weight": "model_00004-of-00072.safetensors", "h.2.self_attention.query_key_value.bias": "model_00004-of-00072.safetensors", "h.2.self_attention.query_key_value.weight": "model_00004-of-00072.safetensors", "h.20.input_layernorm.bias": "model_00022-of-00072.safetensors", "h.20.input_layernorm.weight": "model_00022-of-00072.safetensors", "h.20.mlp.dense_4h_to_h.bias": "model_00022-of-00072.safetensors", "h.20.mlp.dense_4h_to_h.weight": "model_00022-of-00072.safetensors", "h.20.mlp.dense_h_to_4h.bias": "model_00022-of-00072.safetensors", "h.20.mlp.dense_h_to_4h.weight": "model_00022-of-00072.safetensors", "h.20.post_attention_layernorm.bias": "model_00022-of-00072.safetensors", "h.20.post_attention_layernorm.weight": "model_00022-of-00072.safetensors", "h.20.self_attention.dense.bias": "model_00022-of-00072.safetensors", "h.20.self_attention.dense.weight": "model_00022-of-00072.safetensors", "h.20.self_attention.query_key_value.bias": "model_00022-of-00072.safetensors", "h.20.self_attention.query_key_value.weight": "model_00022-of-00072.safetensors", "h.21.input_layernorm.bias": "model_00023-of-00072.safetensors", "h.21.input_layernorm.weight": "model_00023-of-00072.safetensors", "h.21.mlp.dense_4h_to_h.bias": "model_00023-of-00072.safetensors", "h.21.mlp.dense_4h_to_h.weight": "model_00023-of-00072.safetensors", "h.21.mlp.dense_h_to_4h.bias": "model_00023-of-00072.safetensors", "h.21.mlp.dense_h_to_4h.weight": "model_00023-of-00072.safetensors", "h.21.post_attention_layernorm.bias": "model_00023-of-00072.safetensors", "h.21.post_attention_layernorm.weight": "model_00023-of-00072.safetensors", "h.21.self_attention.dense.bias": "model_00023-of-00072.safetensors", "h.21.self_attention.dense.weight": "model_00023-of-00072.safetensors", "h.21.self_attention.query_key_value.bias": "model_00023-of-00072.safetensors", "h.21.self_attention.query_key_value.weight": "model_00023-of-00072.safetensors", "h.22.input_layernorm.bias": "model_00024-of-00072.safetensors", "h.22.input_layernorm.weight": "model_00024-of-00072.safetensors", "h.22.mlp.dense_4h_to_h.bias": "model_00024-of-00072.safetensors", "h.22.mlp.dense_4h_to_h.weight": "model_00024-of-00072.safetensors", "h.22.mlp.dense_h_to_4h.bias": "model_00024-of-00072.safetensors", "h.22.mlp.dense_h_to_4h.weight": "model_00024-of-00072.safetensors", "h.22.post_attention_layernorm.bias": "model_00024-of-00072.safetensors", "h.22.post_attention_layernorm.weight": "model_00024-of-00072.safetensors", "h.22.self_attention.dense.bias": "model_00024-of-00072.safetensors", "h.22.self_attention.dense.weight": "model_00024-of-00072.safetensors", "h.22.self_attention.query_key_value.bias": "model_00024-of-00072.safetensors", "h.22.self_attention.query_key_value.weight": "model_00024-of-00072.safetensors", "h.23.input_layernorm.bias": "model_00025-of-00072.safetensors", "h.23.input_layernorm.weight": "model_00025-of-00072.safetensors", "h.23.mlp.dense_4h_to_h.bias": "model_00025-of-00072.safetensors", "h.23.mlp.dense_4h_to_h.weight": "model_00025-of-00072.safetensors", "h.23.mlp.dense_h_to_4h.bias": "model_00025-of-00072.safetensors", "h.23.mlp.dense_h_to_4h.weight": "model_00025-of-00072.safetensors", "h.23.post_attention_layernorm.bias": "model_00025-of-00072.safetensors", "h.23.post_attention_layernorm.weight": "model_00025-of-00072.safetensors", "h.23.self_attention.dense.bias": "model_00025-of-00072.safetensors", "h.23.self_attention.dense.weight": "model_00025-of-00072.safetensors", "h.23.self_attention.query_key_value.bias": "model_00025-of-00072.safetensors", "h.23.self_attention.query_key_value.weight": "model_00025-of-00072.safetensors", "h.24.input_layernorm.bias": "model_00026-of-00072.safetensors", "h.24.input_layernorm.weight": "model_00026-of-00072.safetensors", "h.24.mlp.dense_4h_to_h.bias": "model_00026-of-00072.safetensors", "h.24.mlp.dense_4h_to_h.weight": "model_00026-of-00072.safetensors", "h.24.mlp.dense_h_to_4h.bias": "model_00026-of-00072.safetensors", "h.24.mlp.dense_h_to_4h.weight": "model_00026-of-00072.safetensors", "h.24.post_attention_layernorm.bias": "model_00026-of-00072.safetensors", "h.24.post_attention_layernorm.weight": "model_00026-of-00072.safetensors", "h.24.self_attention.dense.bias": "model_00026-of-00072.safetensors", "h.24.self_attention.dense.weight": "model_00026-of-00072.safetensors", "h.24.self_attention.query_key_value.bias": "model_00026-of-00072.safetensors", "h.24.self_attention.query_key_value.weight": "model_00026-of-00072.safetensors", "h.25.input_layernorm.bias": "model_00027-of-00072.safetensors", "h.25.input_layernorm.weight": "model_00027-of-00072.safetensors", "h.25.mlp.dense_4h_to_h.bias": "model_00027-of-00072.safetensors", "h.25.mlp.dense_4h_to_h.weight": "model_00027-of-00072.safetensors", "h.25.mlp.dense_h_to_4h.bias": "model_00027-of-00072.safetensors", "h.25.mlp.dense_h_to_4h.weight": "model_00027-of-00072.safetensors", "h.25.post_attention_layernorm.bias": "model_00027-of-00072.safetensors", "h.25.post_attention_layernorm.weight": "model_00027-of-00072.safetensors", "h.25.self_attention.dense.bias": "model_00027-of-00072.safetensors", "h.25.self_attention.dense.weight": "model_00027-of-00072.safetensors", "h.25.self_attention.query_key_value.bias": "model_00027-of-00072.safetensors", "h.25.self_attention.query_key_value.weight": "model_00027-of-00072.safetensors", "h.26.input_layernorm.bias": "model_00028-of-00072.safetensors", "h.26.input_layernorm.weight": "model_00028-of-00072.safetensors", "h.26.mlp.dense_4h_to_h.bias": "model_00028-of-00072.safetensors", "h.26.mlp.dense_4h_to_h.weight": "model_00028-of-00072.safetensors", "h.26.mlp.dense_h_to_4h.bias": "model_00028-of-00072.safetensors", "h.26.mlp.dense_h_to_4h.weight": "model_00028-of-00072.safetensors", "h.26.post_attention_layernorm.bias": "model_00028-of-00072.safetensors", "h.26.post_attention_layernorm.weight": "model_00028-of-00072.safetensors", "h.26.self_attention.dense.bias": "model_00028-of-00072.safetensors", "h.26.self_attention.dense.weight": "model_00028-of-00072.safetensors", "h.26.self_attention.query_key_value.bias": "model_00028-of-00072.safetensors", "h.26.self_attention.query_key_value.weight": "model_00028-of-00072.safetensors", "h.27.input_layernorm.bias": "model_00029-of-00072.safetensors", "h.27.input_layernorm.weight": "model_00029-of-00072.safetensors", "h.27.mlp.dense_4h_to_h.bias": "model_00029-of-00072.safetensors", "h.27.mlp.dense_4h_to_h.weight": "model_00029-of-00072.safetensors", "h.27.mlp.dense_h_to_4h.bias": "model_00029-of-00072.safetensors", "h.27.mlp.dense_h_to_4h.weight": "model_00029-of-00072.safetensors", "h.27.post_attention_layernorm.bias": "model_00029-of-00072.safetensors", "h.27.post_attention_layernorm.weight": "model_00029-of-00072.safetensors", "h.27.self_attention.dense.bias": "model_00029-of-00072.safetensors", "h.27.self_attention.dense.weight": "model_00029-of-00072.safetensors", "h.27.self_attention.query_key_value.bias": "model_00029-of-00072.safetensors", "h.27.self_attention.query_key_value.weight": "model_00029-of-00072.safetensors", "h.28.input_layernorm.bias": "model_00030-of-00072.safetensors", "h.28.input_layernorm.weight": "model_00030-of-00072.safetensors", "h.28.mlp.dense_4h_to_h.bias": "model_00030-of-00072.safetensors", "h.28.mlp.dense_4h_to_h.weight": "model_00030-of-00072.safetensors", "h.28.mlp.dense_h_to_4h.bias": "model_00030-of-00072.safetensors", "h.28.mlp.dense_h_to_4h.weight": "model_00030-of-00072.safetensors", "h.28.post_attention_layernorm.bias": "model_00030-of-00072.safetensors", "h.28.post_attention_layernorm.weight": "model_00030-of-00072.safetensors", "h.28.self_attention.dense.bias": "model_00030-of-00072.safetensors", "h.28.self_attention.dense.weight": "model_00030-of-00072.safetensors", "h.28.self_attention.query_key_value.bias": "model_00030-of-00072.safetensors", "h.28.self_attention.query_key_value.weight": "model_00030-of-00072.safetensors", "h.29.input_layernorm.bias": "model_00031-of-00072.safetensors", "h.29.input_layernorm.weight": "model_00031-of-00072.safetensors", "h.29.mlp.dense_4h_to_h.bias": "model_00031-of-00072.safetensors", "h.29.mlp.dense_4h_to_h.weight": "model_00031-of-00072.safetensors", "h.29.mlp.dense_h_to_4h.bias": "model_00031-of-00072.safetensors", "h.29.mlp.dense_h_to_4h.weight": "model_00031-of-00072.safetensors", "h.29.post_attention_layernorm.bias": "model_00031-of-00072.safetensors", "h.29.post_attention_layernorm.weight": "model_00031-of-00072.safetensors", "h.29.self_attention.dense.bias": "model_00031-of-00072.safetensors", "h.29.self_attention.dense.weight": "model_00031-of-00072.safetensors", "h.29.self_attention.query_key_value.bias": "model_00031-of-00072.safetensors", "h.29.self_attention.query_key_value.weight": "model_00031-of-00072.safetensors", "h.3.input_layernorm.bias": "model_00005-of-00072.safetensors", "h.3.input_layernorm.weight": "model_00005-of-00072.safetensors", "h.3.mlp.dense_4h_to_h.bias": "model_00005-of-00072.safetensors", "h.3.mlp.dense_4h_to_h.weight": "model_00005-of-00072.safetensors", "h.3.mlp.dense_h_to_4h.bias": "model_00005-of-00072.safetensors", "h.3.mlp.dense_h_to_4h.weight": "model_00005-of-00072.safetensors", "h.3.post_attention_layernorm.bias": "model_00005-of-00072.safetensors", "h.3.post_attention_layernorm.weight": "model_00005-of-00072.safetensors", "h.3.self_attention.dense.bias": "model_00005-of-00072.safetensors", "h.3.self_attention.dense.weight": "model_00005-of-00072.safetensors", "h.3.self_attention.query_key_value.bias": "model_00005-of-00072.safetensors", "h.3.self_attention.query_key_value.weight": "model_00005-of-00072.safetensors", "h.30.input_layernorm.bias": "model_00032-of-00072.safetensors", "h.30.input_layernorm.weight": "model_00032-of-00072.safetensors", "h.30.mlp.dense_4h_to_h.bias": "model_00032-of-00072.safetensors", "h.30.mlp.dense_4h_to_h.weight": "model_00032-of-00072.safetensors", "h.30.mlp.dense_h_to_4h.bias": "model_00032-of-00072.safetensors", "h.30.mlp.dense_h_to_4h.weight": "model_00032-of-00072.safetensors", "h.30.post_attention_layernorm.bias": "model_00032-of-00072.safetensors", "h.30.post_attention_layernorm.weight": "model_00032-of-00072.safetensors", "h.30.self_attention.dense.bias": "model_00032-of-00072.safetensors", "h.30.self_attention.dense.weight": "model_00032-of-00072.safetensors", "h.30.self_attention.query_key_value.bias": "model_00032-of-00072.safetensors", "h.30.self_attention.query_key_value.weight": "model_00032-of-00072.safetensors", "h.31.input_layernorm.bias": "model_00033-of-00072.safetensors", "h.31.input_layernorm.weight": "model_00033-of-00072.safetensors", "h.31.mlp.dense_4h_to_h.bias": "model_00033-of-00072.safetensors", "h.31.mlp.dense_4h_to_h.weight": "model_00033-of-00072.safetensors", "h.31.mlp.dense_h_to_4h.bias": "model_00033-of-00072.safetensors", "h.31.mlp.dense_h_to_4h.weight": "model_00033-of-00072.safetensors", "h.31.post_attention_layernorm.bias": "model_00033-of-00072.safetensors", "h.31.post_attention_layernorm.weight": "model_00033-of-00072.safetensors", "h.31.self_attention.dense.bias": "model_00033-of-00072.safetensors", "h.31.self_attention.dense.weight": "model_00033-of-00072.safetensors", "h.31.self_attention.query_key_value.bias": "model_00033-of-00072.safetensors", "h.31.self_attention.query_key_value.weight": "model_00033-of-00072.safetensors", "h.32.input_layernorm.bias": "model_00034-of-00072.safetensors", "h.32.input_layernorm.weight": "model_00034-of-00072.safetensors", "h.32.mlp.dense_4h_to_h.bias": "model_00034-of-00072.safetensors", "h.32.mlp.dense_4h_to_h.weight": "model_00034-of-00072.safetensors", "h.32.mlp.dense_h_to_4h.bias": "model_00034-of-00072.safetensors", "h.32.mlp.dense_h_to_4h.weight": "model_00034-of-00072.safetensors", "h.32.post_attention_layernorm.bias": "model_00034-of-00072.safetensors", "h.32.post_attention_layernorm.weight": "model_00034-of-00072.safetensors", "h.32.self_attention.dense.bias": "model_00034-of-00072.safetensors", "h.32.self_attention.dense.weight": "model_00034-of-00072.safetensors", "h.32.self_attention.query_key_value.bias": "model_00034-of-00072.safetensors", "h.32.self_attention.query_key_value.weight": "model_00034-of-00072.safetensors", "h.33.input_layernorm.bias": "model_00035-of-00072.safetensors", "h.33.input_layernorm.weight": "model_00035-of-00072.safetensors", "h.33.mlp.dense_4h_to_h.bias": "model_00035-of-00072.safetensors", "h.33.mlp.dense_4h_to_h.weight": "model_00035-of-00072.safetensors", "h.33.mlp.dense_h_to_4h.bias": "model_00035-of-00072.safetensors", "h.33.mlp.dense_h_to_4h.weight": "model_00035-of-00072.safetensors", "h.33.post_attention_layernorm.bias": "model_00035-of-00072.safetensors", "h.33.post_attention_layernorm.weight": "model_00035-of-00072.safetensors", "h.33.self_attention.dense.bias": "model_00035-of-00072.safetensors", "h.33.self_attention.dense.weight": "model_00035-of-00072.safetensors", "h.33.self_attention.query_key_value.bias": "model_00035-of-00072.safetensors", "h.33.self_attention.query_key_value.weight": "model_00035-of-00072.safetensors", "h.34.input_layernorm.bias": "model_00036-of-00072.safetensors", "h.34.input_layernorm.weight": "model_00036-of-00072.safetensors", "h.34.mlp.dense_4h_to_h.bias": "model_00036-of-00072.safetensors", "h.34.mlp.dense_4h_to_h.weight": "model_00036-of-00072.safetensors", "h.34.mlp.dense_h_to_4h.bias": "model_00036-of-00072.safetensors", "h.34.mlp.dense_h_to_4h.weight": "model_00036-of-00072.safetensors", "h.34.post_attention_layernorm.bias": "model_00036-of-00072.safetensors", "h.34.post_attention_layernorm.weight": "model_00036-of-00072.safetensors", "h.34.self_attention.dense.bias": "model_00036-of-00072.safetensors", "h.34.self_attention.dense.weight": "model_00036-of-00072.safetensors", "h.34.self_attention.query_key_value.bias": "model_00036-of-00072.safetensors", "h.34.self_attention.query_key_value.weight": "model_00036-of-00072.safetensors", "h.35.input_layernorm.bias": "model_00037-of-00072.safetensors", "h.35.input_layernorm.weight": "model_00037-of-00072.safetensors", "h.35.mlp.dense_4h_to_h.bias": "model_00037-of-00072.safetensors", "h.35.mlp.dense_4h_to_h.weight": "model_00037-of-00072.safetensors", "h.35.mlp.dense_h_to_4h.bias": "model_00037-of-00072.safetensors", "h.35.mlp.dense_h_to_4h.weight": "model_00037-of-00072.safetensors", "h.35.post_attention_layernorm.bias": "model_00037-of-00072.safetensors", "h.35.post_attention_layernorm.weight": "model_00037-of-00072.safetensors", "h.35.self_attention.dense.bias": "model_00037-of-00072.safetensors", "h.35.self_attention.dense.weight": "model_00037-of-00072.safetensors", "h.35.self_attention.query_key_value.bias": "model_00037-of-00072.safetensors", "h.35.self_attention.query_key_value.weight": "model_00037-of-00072.safetensors", "h.36.input_layernorm.bias": "model_00038-of-00072.safetensors", "h.36.input_layernorm.weight": "model_00038-of-00072.safetensors", "h.36.mlp.dense_4h_to_h.bias": "model_00038-of-00072.safetensors", "h.36.mlp.dense_4h_to_h.weight": "model_00038-of-00072.safetensors", "h.36.mlp.dense_h_to_4h.bias": "model_00038-of-00072.safetensors", "h.36.mlp.dense_h_to_4h.weight": "model_00038-of-00072.safetensors", "h.36.post_attention_layernorm.bias": "model_00038-of-00072.safetensors", "h.36.post_attention_layernorm.weight": "model_00038-of-00072.safetensors", "h.36.self_attention.dense.bias": "model_00038-of-00072.safetensors", "h.36.self_attention.dense.weight": "model_00038-of-00072.safetensors", "h.36.self_attention.query_key_value.bias": "model_00038-of-00072.safetensors", "h.36.self_attention.query_key_value.weight": "model_00038-of-00072.safetensors", "h.37.input_layernorm.bias": "model_00039-of-00072.safetensors", "h.37.input_layernorm.weight": "model_00039-of-00072.safetensors", "h.37.mlp.dense_4h_to_h.bias": "model_00039-of-00072.safetensors", "h.37.mlp.dense_4h_to_h.weight": "model_00039-of-00072.safetensors", "h.37.mlp.dense_h_to_4h.bias": "model_00039-of-00072.safetensors", "h.37.mlp.dense_h_to_4h.weight": "model_00039-of-00072.safetensors", "h.37.post_attention_layernorm.bias": "model_00039-of-00072.safetensors", "h.37.post_attention_layernorm.weight": "model_00039-of-00072.safetensors", "h.37.self_attention.dense.bias": "model_00039-of-00072.safetensors", "h.37.self_attention.dense.weight": "model_00039-of-00072.safetensors", "h.37.self_attention.query_key_value.bias": "model_00039-of-00072.safetensors", "h.37.self_attention.query_key_value.weight": "model_00039-of-00072.safetensors", "h.38.input_layernorm.bias": "model_00040-of-00072.safetensors", "h.38.input_layernorm.weight": "model_00040-of-00072.safetensors", "h.38.mlp.dense_4h_to_h.bias": "model_00040-of-00072.safetensors", "h.38.mlp.dense_4h_to_h.weight": "model_00040-of-00072.safetensors", "h.38.mlp.dense_h_to_4h.bias": "model_00040-of-00072.safetensors", "h.38.mlp.dense_h_to_4h.weight": "model_00040-of-00072.safetensors", "h.38.post_attention_layernorm.bias": "model_00040-of-00072.safetensors", "h.38.post_attention_layernorm.weight": "model_00040-of-00072.safetensors", "h.38.self_attention.dense.bias": "model_00040-of-00072.safetensors", "h.38.self_attention.dense.weight": "model_00040-of-00072.safetensors", "h.38.self_attention.query_key_value.bias": "model_00040-of-00072.safetensors", "h.38.self_attention.query_key_value.weight": "model_00040-of-00072.safetensors", "h.39.input_layernorm.bias": "model_00041-of-00072.safetensors", "h.39.input_layernorm.weight": "model_00041-of-00072.safetensors", "h.39.mlp.dense_4h_to_h.bias": "model_00041-of-00072.safetensors", "h.39.mlp.dense_4h_to_h.weight": "model_00041-of-00072.safetensors", "h.39.mlp.dense_h_to_4h.bias": "model_00041-of-00072.safetensors", "h.39.mlp.dense_h_to_4h.weight": "model_00041-of-00072.safetensors", "h.39.post_attention_layernorm.bias": "model_00041-of-00072.safetensors", "h.39.post_attention_layernorm.weight": "model_00041-of-00072.safetensors", "h.39.self_attention.dense.bias": "model_00041-of-00072.safetensors", "h.39.self_attention.dense.weight": "model_00041-of-00072.safetensors", "h.39.self_attention.query_key_value.bias": "model_00041-of-00072.safetensors", "h.39.self_attention.query_key_value.weight": "model_00041-of-00072.safetensors", "h.4.input_layernorm.bias": "model_00006-of-00072.safetensors", "h.4.input_layernorm.weight": "model_00006-of-00072.safetensors", "h.4.mlp.dense_4h_to_h.bias": "model_00006-of-00072.safetensors", "h.4.mlp.dense_4h_to_h.weight": "model_00006-of-00072.safetensors", "h.4.mlp.dense_h_to_4h.bias": "model_00006-of-00072.safetensors", "h.4.mlp.dense_h_to_4h.weight": "model_00006-of-00072.safetensors", "h.4.post_attention_layernorm.bias": "model_00006-of-00072.safetensors", "h.4.post_attention_layernorm.weight": "model_00006-of-00072.safetensors", "h.4.self_attention.dense.bias": "model_00006-of-00072.safetensors", "h.4.self_attention.dense.weight": "model_00006-of-00072.safetensors", "h.4.self_attention.query_key_value.bias": "model_00006-of-00072.safetensors", "h.4.self_attention.query_key_value.weight": "model_00006-of-00072.safetensors", "h.40.input_layernorm.bias": "model_00042-of-00072.safetensors", "h.40.input_layernorm.weight": "model_00042-of-00072.safetensors", "h.40.mlp.dense_4h_to_h.bias": "model_00042-of-00072.safetensors", "h.40.mlp.dense_4h_to_h.weight": "model_00042-of-00072.safetensors", "h.40.mlp.dense_h_to_4h.bias": "model_00042-of-00072.safetensors", "h.40.mlp.dense_h_to_4h.weight": "model_00042-of-00072.safetensors", "h.40.post_attention_layernorm.bias": "model_00042-of-00072.safetensors", "h.40.post_attention_layernorm.weight": "model_00042-of-00072.safetensors", "h.40.self_attention.dense.bias": "model_00042-of-00072.safetensors", "h.40.self_attention.dense.weight": "model_00042-of-00072.safetensors", "h.40.self_attention.query_key_value.bias": "model_00042-of-00072.safetensors", "h.40.self_attention.query_key_value.weight": "model_00042-of-00072.safetensors", "h.41.input_layernorm.bias": "model_00043-of-00072.safetensors", "h.41.input_layernorm.weight": "model_00043-of-00072.safetensors", "h.41.mlp.dense_4h_to_h.bias": "model_00043-of-00072.safetensors", "h.41.mlp.dense_4h_to_h.weight": "model_00043-of-00072.safetensors", "h.41.mlp.dense_h_to_4h.bias": "model_00043-of-00072.safetensors", "h.41.mlp.dense_h_to_4h.weight": "model_00043-of-00072.safetensors", "h.41.post_attention_layernorm.bias": "model_00043-of-00072.safetensors", "h.41.post_attention_layernorm.weight": "model_00043-of-00072.safetensors", "h.41.self_attention.dense.bias": "model_00043-of-00072.safetensors", "h.41.self_attention.dense.weight": "model_00043-of-00072.safetensors", "h.41.self_attention.query_key_value.bias": "model_00043-of-00072.safetensors", "h.41.self_attention.query_key_value.weight": "model_00043-of-00072.safetensors", "h.42.input_layernorm.bias": "model_00044-of-00072.safetensors", "h.42.input_layernorm.weight": "model_00044-of-00072.safetensors", "h.42.mlp.dense_4h_to_h.bias": "model_00044-of-00072.safetensors", "h.42.mlp.dense_4h_to_h.weight": "model_00044-of-00072.safetensors", "h.42.mlp.dense_h_to_4h.bias": "model_00044-of-00072.safetensors", "h.42.mlp.dense_h_to_4h.weight": "model_00044-of-00072.safetensors", "h.42.post_attention_layernorm.bias": "model_00044-of-00072.safetensors", "h.42.post_attention_layernorm.weight": "model_00044-of-00072.safetensors", "h.42.self_attention.dense.bias": "model_00044-of-00072.safetensors", "h.42.self_attention.dense.weight": "model_00044-of-00072.safetensors", "h.42.self_attention.query_key_value.bias": "model_00044-of-00072.safetensors", "h.42.self_attention.query_key_value.weight": "model_00044-of-00072.safetensors", "h.43.input_layernorm.bias": "model_00045-of-00072.safetensors", "h.43.input_layernorm.weight": "model_00045-of-00072.safetensors", "h.43.mlp.dense_4h_to_h.bias": "model_00045-of-00072.safetensors", "h.43.mlp.dense_4h_to_h.weight": "model_00045-of-00072.safetensors", "h.43.mlp.dense_h_to_4h.bias": "model_00045-of-00072.safetensors", "h.43.mlp.dense_h_to_4h.weight": "model_00045-of-00072.safetensors", "h.43.post_attention_layernorm.bias": "model_00045-of-00072.safetensors", "h.43.post_attention_layernorm.weight": "model_00045-of-00072.safetensors", "h.43.self_attention.dense.bias": "model_00045-of-00072.safetensors", "h.43.self_attention.dense.weight": "model_00045-of-00072.safetensors", "h.43.self_attention.query_key_value.bias": "model_00045-of-00072.safetensors", "h.43.self_attention.query_key_value.weight": "model_00045-of-00072.safetensors", "h.44.input_layernorm.bias": "model_00046-of-00072.safetensors", "h.44.input_layernorm.weight": "model_00046-of-00072.safetensors", "h.44.mlp.dense_4h_to_h.bias": "model_00046-of-00072.safetensors", "h.44.mlp.dense_4h_to_h.weight": "model_00046-of-00072.safetensors", "h.44.mlp.dense_h_to_4h.bias": "model_00046-of-00072.safetensors", "h.44.mlp.dense_h_to_4h.weight": "model_00046-of-00072.safetensors", "h.44.post_attention_layernorm.bias": "model_00046-of-00072.safetensors", "h.44.post_attention_layernorm.weight": "model_00046-of-00072.safetensors", "h.44.self_attention.dense.bias": "model_00046-of-00072.safetensors", "h.44.self_attention.dense.weight": "model_00046-of-00072.safetensors", "h.44.self_attention.query_key_value.bias": "model_00046-of-00072.safetensors", "h.44.self_attention.query_key_value.weight": "model_00046-of-00072.safetensors", "h.45.input_layernorm.bias": "model_00047-of-00072.safetensors", "h.45.input_layernorm.weight": "model_00047-of-00072.safetensors", "h.45.mlp.dense_4h_to_h.bias": "model_00047-of-00072.safetensors", "h.45.mlp.dense_4h_to_h.weight": "model_00047-of-00072.safetensors", "h.45.mlp.dense_h_to_4h.bias": "model_00047-of-00072.safetensors", "h.45.mlp.dense_h_to_4h.weight": "model_00047-of-00072.safetensors", "h.45.post_attention_layernorm.bias": "model_00047-of-00072.safetensors", "h.45.post_attention_layernorm.weight": "model_00047-of-00072.safetensors", "h.45.self_attention.dense.bias": "model_00047-of-00072.safetensors", "h.45.self_attention.dense.weight": "model_00047-of-00072.safetensors", "h.45.self_attention.query_key_value.bias": "model_00047-of-00072.safetensors", "h.45.self_attention.query_key_value.weight": "model_00047-of-00072.safetensors", "h.46.input_layernorm.bias": "model_00048-of-00072.safetensors", "h.46.input_layernorm.weight": "model_00048-of-00072.safetensors", "h.46.mlp.dense_4h_to_h.bias": "model_00048-of-00072.safetensors", "h.46.mlp.dense_4h_to_h.weight": "model_00048-of-00072.safetensors", "h.46.mlp.dense_h_to_4h.bias": "model_00048-of-00072.safetensors", "h.46.mlp.dense_h_to_4h.weight": "model_00048-of-00072.safetensors", "h.46.post_attention_layernorm.bias": "model_00048-of-00072.safetensors", "h.46.post_attention_layernorm.weight": "model_00048-of-00072.safetensors", "h.46.self_attention.dense.bias": "model_00048-of-00072.safetensors", "h.46.self_attention.dense.weight": "model_00048-of-00072.safetensors", "h.46.self_attention.query_key_value.bias": "model_00048-of-00072.safetensors", "h.46.self_attention.query_key_value.weight": "model_00048-of-00072.safetensors", "h.47.input_layernorm.bias": "model_00049-of-00072.safetensors", "h.47.input_layernorm.weight": "model_00049-of-00072.safetensors", "h.47.mlp.dense_4h_to_h.bias": "model_00049-of-00072.safetensors", "h.47.mlp.dense_4h_to_h.weight": "model_00049-of-00072.safetensors", "h.47.mlp.dense_h_to_4h.bias": "model_00049-of-00072.safetensors", "h.47.mlp.dense_h_to_4h.weight": "model_00049-of-00072.safetensors", "h.47.post_attention_layernorm.bias": "model_00049-of-00072.safetensors", "h.47.post_attention_layernorm.weight": "model_00049-of-00072.safetensors", "h.47.self_attention.dense.bias": "model_00049-of-00072.safetensors", "h.47.self_attention.dense.weight": "model_00049-of-00072.safetensors", "h.47.self_attention.query_key_value.bias": "model_00049-of-00072.safetensors", "h.47.self_attention.query_key_value.weight": "model_00049-of-00072.safetensors", "h.48.input_layernorm.bias": "model_00050-of-00072.safetensors", "h.48.input_layernorm.weight": "model_00050-of-00072.safetensors", "h.48.mlp.dense_4h_to_h.bias": "model_00050-of-00072.safetensors", "h.48.mlp.dense_4h_to_h.weight": "model_00050-of-00072.safetensors", "h.48.mlp.dense_h_to_4h.bias": "model_00050-of-00072.safetensors", "h.48.mlp.dense_h_to_4h.weight": "model_00050-of-00072.safetensors", "h.48.post_attention_layernorm.bias": "model_00050-of-00072.safetensors", "h.48.post_attention_layernorm.weight": "model_00050-of-00072.safetensors", "h.48.self_attention.dense.bias": "model_00050-of-00072.safetensors", "h.48.self_attention.dense.weight": "model_00050-of-00072.safetensors", "h.48.self_attention.query_key_value.bias": "model_00050-of-00072.safetensors", "h.48.self_attention.query_key_value.weight": "model_00050-of-00072.safetensors", "h.49.input_layernorm.bias": "model_00051-of-00072.safetensors", "h.49.input_layernorm.weight": "model_00051-of-00072.safetensors", "h.49.mlp.dense_4h_to_h.bias": "model_00051-of-00072.safetensors", "h.49.mlp.dense_4h_to_h.weight": "model_00051-of-00072.safetensors", "h.49.mlp.dense_h_to_4h.bias": "model_00051-of-00072.safetensors", "h.49.mlp.dense_h_to_4h.weight": "model_00051-of-00072.safetensors", "h.49.post_attention_layernorm.bias": "model_00051-of-00072.safetensors", "h.49.post_attention_layernorm.weight": "model_00051-of-00072.safetensors", "h.49.self_attention.dense.bias": "model_00051-of-00072.safetensors", "h.49.self_attention.dense.weight": "model_00051-of-00072.safetensors", "h.49.self_attention.query_key_value.bias": "model_00051-of-00072.safetensors", "h.49.self_attention.query_key_value.weight": "model_00051-of-00072.safetensors", "h.5.input_layernorm.bias": "model_00007-of-00072.safetensors", "h.5.input_layernorm.weight": "model_00007-of-00072.safetensors", "h.5.mlp.dense_4h_to_h.bias": "model_00007-of-00072.safetensors", "h.5.mlp.dense_4h_to_h.weight": "model_00007-of-00072.safetensors", "h.5.mlp.dense_h_to_4h.bias": "model_00007-of-00072.safetensors", "h.5.mlp.dense_h_to_4h.weight": "model_00007-of-00072.safetensors", "h.5.post_attention_layernorm.bias": "model_00007-of-00072.safetensors", "h.5.post_attention_layernorm.weight": "model_00007-of-00072.safetensors", "h.5.self_attention.dense.bias": "model_00007-of-00072.safetensors", "h.5.self_attention.dense.weight": "model_00007-of-00072.safetensors", "h.5.self_attention.query_key_value.bias": "model_00007-of-00072.safetensors", "h.5.self_attention.query_key_value.weight": "model_00007-of-00072.safetensors", "h.50.input_layernorm.bias": "model_00052-of-00072.safetensors", "h.50.input_layernorm.weight": "model_00052-of-00072.safetensors", "h.50.mlp.dense_4h_to_h.bias": "model_00052-of-00072.safetensors", "h.50.mlp.dense_4h_to_h.weight": "model_00052-of-00072.safetensors", "h.50.mlp.dense_h_to_4h.bias": "model_00052-of-00072.safetensors", "h.50.mlp.dense_h_to_4h.weight": "model_00052-of-00072.safetensors", "h.50.post_attention_layernorm.bias": "model_00052-of-00072.safetensors", "h.50.post_attention_layernorm.weight": "model_00052-of-00072.safetensors", "h.50.self_attention.dense.bias": "model_00052-of-00072.safetensors", "h.50.self_attention.dense.weight": "model_00052-of-00072.safetensors", "h.50.self_attention.query_key_value.bias": "model_00052-of-00072.safetensors", "h.50.self_attention.query_key_value.weight": "model_00052-of-00072.safetensors", "h.51.input_layernorm.bias": "model_00053-of-00072.safetensors", "h.51.input_layernorm.weight": "model_00053-of-00072.safetensors", "h.51.mlp.dense_4h_to_h.bias": "model_00053-of-00072.safetensors", "h.51.mlp.dense_4h_to_h.weight": "model_00053-of-00072.safetensors", "h.51.mlp.dense_h_to_4h.bias": "model_00053-of-00072.safetensors", "h.51.mlp.dense_h_to_4h.weight": "model_00053-of-00072.safetensors", "h.51.post_attention_layernorm.bias": "model_00053-of-00072.safetensors", "h.51.post_attention_layernorm.weight": "model_00053-of-00072.safetensors", "h.51.self_attention.dense.bias": "model_00053-of-00072.safetensors", "h.51.self_attention.dense.weight": "model_00053-of-00072.safetensors", "h.51.self_attention.query_key_value.bias": "model_00053-of-00072.safetensors", "h.51.self_attention.query_key_value.weight": "model_00053-of-00072.safetensors", "h.52.input_layernorm.bias": "model_00054-of-00072.safetensors", "h.52.input_layernorm.weight": "model_00054-of-00072.safetensors", "h.52.mlp.dense_4h_to_h.bias": "model_00054-of-00072.safetensors", "h.52.mlp.dense_4h_to_h.weight": "model_00054-of-00072.safetensors", "h.52.mlp.dense_h_to_4h.bias": "model_00054-of-00072.safetensors", "h.52.mlp.dense_h_to_4h.weight": "model_00054-of-00072.safetensors", "h.52.post_attention_layernorm.bias": "model_00054-of-00072.safetensors", "h.52.post_attention_layernorm.weight": "model_00054-of-00072.safetensors", "h.52.self_attention.dense.bias": "model_00054-of-00072.safetensors", "h.52.self_attention.dense.weight": "model_00054-of-00072.safetensors", "h.52.self_attention.query_key_value.bias": "model_00054-of-00072.safetensors", "h.52.self_attention.query_key_value.weight": "model_00054-of-00072.safetensors", "h.53.input_layernorm.bias": "model_00055-of-00072.safetensors", "h.53.input_layernorm.weight": "model_00055-of-00072.safetensors", "h.53.mlp.dense_4h_to_h.bias": "model_00055-of-00072.safetensors", "h.53.mlp.dense_4h_to_h.weight": "model_00055-of-00072.safetensors", "h.53.mlp.dense_h_to_4h.bias": "model_00055-of-00072.safetensors", "h.53.mlp.dense_h_to_4h.weight": "model_00055-of-00072.safetensors", "h.53.post_attention_layernorm.bias": "model_00055-of-00072.safetensors", "h.53.post_attention_layernorm.weight": "model_00055-of-00072.safetensors", "h.53.self_attention.dense.bias": "model_00055-of-00072.safetensors", "h.53.self_attention.dense.weight": "model_00055-of-00072.safetensors", "h.53.self_attention.query_key_value.bias": "model_00055-of-00072.safetensors", "h.53.self_attention.query_key_value.weight": "model_00055-of-00072.safetensors", "h.54.input_layernorm.bias": "model_00056-of-00072.safetensors", "h.54.input_layernorm.weight": "model_00056-of-00072.safetensors", "h.54.mlp.dense_4h_to_h.bias": "model_00056-of-00072.safetensors", "h.54.mlp.dense_4h_to_h.weight": "model_00056-of-00072.safetensors", "h.54.mlp.dense_h_to_4h.bias": "model_00056-of-00072.safetensors", "h.54.mlp.dense_h_to_4h.weight": "model_00056-of-00072.safetensors", "h.54.post_attention_layernorm.bias": "model_00056-of-00072.safetensors", "h.54.post_attention_layernorm.weight": "model_00056-of-00072.safetensors", "h.54.self_attention.dense.bias": "model_00056-of-00072.safetensors", "h.54.self_attention.dense.weight": "model_00056-of-00072.safetensors", "h.54.self_attention.query_key_value.bias": "model_00056-of-00072.safetensors", "h.54.self_attention.query_key_value.weight": "model_00056-of-00072.safetensors", "h.55.input_layernorm.bias": "model_00057-of-00072.safetensors", "h.55.input_layernorm.weight": "model_00057-of-00072.safetensors", "h.55.mlp.dense_4h_to_h.bias": "model_00057-of-00072.safetensors", "h.55.mlp.dense_4h_to_h.weight": "model_00057-of-00072.safetensors", "h.55.mlp.dense_h_to_4h.bias": "model_00057-of-00072.safetensors", "h.55.mlp.dense_h_to_4h.weight": "model_00057-of-00072.safetensors", "h.55.post_attention_layernorm.bias": "model_00057-of-00072.safetensors", "h.55.post_attention_layernorm.weight": "model_00057-of-00072.safetensors", "h.55.self_attention.dense.bias": "model_00057-of-00072.safetensors", "h.55.self_attention.dense.weight": "model_00057-of-00072.safetensors", "h.55.self_attention.query_key_value.bias": "model_00057-of-00072.safetensors", "h.55.self_attention.query_key_value.weight": "model_00057-of-00072.safetensors", "h.56.input_layernorm.bias": "model_00058-of-00072.safetensors", "h.56.input_layernorm.weight": "model_00058-of-00072.safetensors", "h.56.mlp.dense_4h_to_h.bias": "model_00058-of-00072.safetensors", "h.56.mlp.dense_4h_to_h.weight": "model_00058-of-00072.safetensors", "h.56.mlp.dense_h_to_4h.bias": "model_00058-of-00072.safetensors", "h.56.mlp.dense_h_to_4h.weight": "model_00058-of-00072.safetensors", "h.56.post_attention_layernorm.bias": "model_00058-of-00072.safetensors", "h.56.post_attention_layernorm.weight": "model_00058-of-00072.safetensors", "h.56.self_attention.dense.bias": "model_00058-of-00072.safetensors", "h.56.self_attention.dense.weight": "model_00058-of-00072.safetensors", "h.56.self_attention.query_key_value.bias": "model_00058-of-00072.safetensors", "h.56.self_attention.query_key_value.weight": "model_00058-of-00072.safetensors", "h.57.input_layernorm.bias": "model_00059-of-00072.safetensors", "h.57.input_layernorm.weight": "model_00059-of-00072.safetensors", "h.57.mlp.dense_4h_to_h.bias": "model_00059-of-00072.safetensors", "h.57.mlp.dense_4h_to_h.weight": "model_00059-of-00072.safetensors", "h.57.mlp.dense_h_to_4h.bias": "model_00059-of-00072.safetensors", "h.57.mlp.dense_h_to_4h.weight": "model_00059-of-00072.safetensors", "h.57.post_attention_layernorm.bias": "model_00059-of-00072.safetensors", "h.57.post_attention_layernorm.weight": "model_00059-of-00072.safetensors", "h.57.self_attention.dense.bias": "model_00059-of-00072.safetensors", "h.57.self_attention.dense.weight": "model_00059-of-00072.safetensors", "h.57.self_attention.query_key_value.bias": "model_00059-of-00072.safetensors", "h.57.self_attention.query_key_value.weight": "model_00059-of-00072.safetensors", "h.58.input_layernorm.bias": "model_00060-of-00072.safetensors", "h.58.input_layernorm.weight": "model_00060-of-00072.safetensors", "h.58.mlp.dense_4h_to_h.bias": "model_00060-of-00072.safetensors", "h.58.mlp.dense_4h_to_h.weight": "model_00060-of-00072.safetensors", "h.58.mlp.dense_h_to_4h.bias": "model_00060-of-00072.safetensors", "h.58.mlp.dense_h_to_4h.weight": "model_00060-of-00072.safetensors", "h.58.post_attention_layernorm.bias": "model_00060-of-00072.safetensors", "h.58.post_attention_layernorm.weight": "model_00060-of-00072.safetensors", "h.58.self_attention.dense.bias": "model_00060-of-00072.safetensors", "h.58.self_attention.dense.weight": "model_00060-of-00072.safetensors", "h.58.self_attention.query_key_value.bias": "model_00060-of-00072.safetensors", "h.58.self_attention.query_key_value.weight": "model_00060-of-00072.safetensors", "h.59.input_layernorm.bias": "model_00061-of-00072.safetensors", "h.59.input_layernorm.weight": "model_00061-of-00072.safetensors", "h.59.mlp.dense_4h_to_h.bias": "model_00061-of-00072.safetensors", "h.59.mlp.dense_4h_to_h.weight": "model_00061-of-00072.safetensors", "h.59.mlp.dense_h_to_4h.bias": "model_00061-of-00072.safetensors", "h.59.mlp.dense_h_to_4h.weight": "model_00061-of-00072.safetensors", "h.59.post_attention_layernorm.bias": "model_00061-of-00072.safetensors", "h.59.post_attention_layernorm.weight": "model_00061-of-00072.safetensors", "h.59.self_attention.dense.bias": "model_00061-of-00072.safetensors", "h.59.self_attention.dense.weight": "model_00061-of-00072.safetensors", "h.59.self_attention.query_key_value.bias": "model_00061-of-00072.safetensors", "h.59.self_attention.query_key_value.weight": "model_00061-of-00072.safetensors", "h.6.input_layernorm.bias": "model_00008-of-00072.safetensors", "h.6.input_layernorm.weight": "model_00008-of-00072.safetensors", "h.6.mlp.dense_4h_to_h.bias": "model_00008-of-00072.safetensors", "h.6.mlp.dense_4h_to_h.weight": "model_00008-of-00072.safetensors", "h.6.mlp.dense_h_to_4h.bias": "model_00008-of-00072.safetensors", "h.6.mlp.dense_h_to_4h.weight": "model_00008-of-00072.safetensors", "h.6.post_attention_layernorm.bias": "model_00008-of-00072.safetensors", "h.6.post_attention_layernorm.weight": "model_00008-of-00072.safetensors", "h.6.self_attention.dense.bias": "model_00008-of-00072.safetensors", "h.6.self_attention.dense.weight": "model_00008-of-00072.safetensors", "h.6.self_attention.query_key_value.bias": "model_00008-of-00072.safetensors", "h.6.self_attention.query_key_value.weight": "model_00008-of-00072.safetensors", "h.60.input_layernorm.bias": "model_00062-of-00072.safetensors", "h.60.input_layernorm.weight": "model_00062-of-00072.safetensors", "h.60.mlp.dense_4h_to_h.bias": "model_00062-of-00072.safetensors", "h.60.mlp.dense_4h_to_h.weight": "model_00062-of-00072.safetensors", "h.60.mlp.dense_h_to_4h.bias": "model_00062-of-00072.safetensors", "h.60.mlp.dense_h_to_4h.weight": "model_00062-of-00072.safetensors", "h.60.post_attention_layernorm.bias": "model_00062-of-00072.safetensors", "h.60.post_attention_layernorm.weight": "model_00062-of-00072.safetensors", "h.60.self_attention.dense.bias": "model_00062-of-00072.safetensors", "h.60.self_attention.dense.weight": "model_00062-of-00072.safetensors", "h.60.self_attention.query_key_value.bias": "model_00062-of-00072.safetensors", "h.60.self_attention.query_key_value.weight": "model_00062-of-00072.safetensors", "h.61.input_layernorm.bias": "model_00063-of-00072.safetensors", "h.61.input_layernorm.weight": "model_00063-of-00072.safetensors", "h.61.mlp.dense_4h_to_h.bias": "model_00063-of-00072.safetensors", "h.61.mlp.dense_4h_to_h.weight": "model_00063-of-00072.safetensors", "h.61.mlp.dense_h_to_4h.bias": "model_00063-of-00072.safetensors", "h.61.mlp.dense_h_to_4h.weight": "model_00063-of-00072.safetensors", "h.61.post_attention_layernorm.bias": "model_00063-of-00072.safetensors", "h.61.post_attention_layernorm.weight": "model_00063-of-00072.safetensors", "h.61.self_attention.dense.bias": "model_00063-of-00072.safetensors", "h.61.self_attention.dense.weight": "model_00063-of-00072.safetensors", "h.61.self_attention.query_key_value.bias": "model_00063-of-00072.safetensors", "h.61.self_attention.query_key_value.weight": "model_00063-of-00072.safetensors", "h.62.input_layernorm.bias": "model_00064-of-00072.safetensors", "h.62.input_layernorm.weight": "model_00064-of-00072.safetensors", "h.62.mlp.dense_4h_to_h.bias": "model_00064-of-00072.safetensors", "h.62.mlp.dense_4h_to_h.weight": "model_00064-of-00072.safetensors", "h.62.mlp.dense_h_to_4h.bias": "model_00064-of-00072.safetensors", "h.62.mlp.dense_h_to_4h.weight": "model_00064-of-00072.safetensors", "h.62.post_attention_layernorm.bias": "model_00064-of-00072.safetensors", "h.62.post_attention_layernorm.weight": "model_00064-of-00072.safetensors", "h.62.self_attention.dense.bias": "model_00064-of-00072.safetensors", "h.62.self_attention.dense.weight": "model_00064-of-00072.safetensors", "h.62.self_attention.query_key_value.bias": "model_00064-of-00072.safetensors", "h.62.self_attention.query_key_value.weight": "model_00064-of-00072.safetensors", "h.63.input_layernorm.bias": "model_00065-of-00072.safetensors", "h.63.input_layernorm.weight": "model_00065-of-00072.safetensors", "h.63.mlp.dense_4h_to_h.bias": "model_00065-of-00072.safetensors", "h.63.mlp.dense_4h_to_h.weight": "model_00065-of-00072.safetensors", "h.63.mlp.dense_h_to_4h.bias": "model_00065-of-00072.safetensors", "h.63.mlp.dense_h_to_4h.weight": "model_00065-of-00072.safetensors", "h.63.post_attention_layernorm.bias": "model_00065-of-00072.safetensors", "h.63.post_attention_layernorm.weight": "model_00065-of-00072.safetensors", "h.63.self_attention.dense.bias": "model_00065-of-00072.safetensors", "h.63.self_attention.dense.weight": "model_00065-of-00072.safetensors", "h.63.self_attention.query_key_value.bias": "model_00065-of-00072.safetensors", "h.63.self_attention.query_key_value.weight": "model_00065-of-00072.safetensors", "h.64.input_layernorm.bias": "model_00066-of-00072.safetensors", "h.64.input_layernorm.weight": "model_00066-of-00072.safetensors", "h.64.mlp.dense_4h_to_h.bias": "model_00066-of-00072.safetensors", "h.64.mlp.dense_4h_to_h.weight": "model_00066-of-00072.safetensors", "h.64.mlp.dense_h_to_4h.bias": "model_00066-of-00072.safetensors", "h.64.mlp.dense_h_to_4h.weight": "model_00066-of-00072.safetensors", "h.64.post_attention_layernorm.bias": "model_00066-of-00072.safetensors", "h.64.post_attention_layernorm.weight": "model_00066-of-00072.safetensors", "h.64.self_attention.dense.bias": "model_00066-of-00072.safetensors", "h.64.self_attention.dense.weight": "model_00066-of-00072.safetensors", "h.64.self_attention.query_key_value.bias": "model_00066-of-00072.safetensors", "h.64.self_attention.query_key_value.weight": "model_00066-of-00072.safetensors", "h.65.input_layernorm.bias": "model_00067-of-00072.safetensors", "h.65.input_layernorm.weight": "model_00067-of-00072.safetensors", "h.65.mlp.dense_4h_to_h.bias": "model_00067-of-00072.safetensors", "h.65.mlp.dense_4h_to_h.weight": "model_00067-of-00072.safetensors", "h.65.mlp.dense_h_to_4h.bias": "model_00067-of-00072.safetensors", "h.65.mlp.dense_h_to_4h.weight": "model_00067-of-00072.safetensors", "h.65.post_attention_layernorm.bias": "model_00067-of-00072.safetensors", "h.65.post_attention_layernorm.weight": "model_00067-of-00072.safetensors", "h.65.self_attention.dense.bias": "model_00067-of-00072.safetensors", "h.65.self_attention.dense.weight": "model_00067-of-00072.safetensors", "h.65.self_attention.query_key_value.bias": "model_00067-of-00072.safetensors", "h.65.self_attention.query_key_value.weight": "model_00067-of-00072.safetensors", "h.66.input_layernorm.bias": "model_00068-of-00072.safetensors", "h.66.input_layernorm.weight": "model_00068-of-00072.safetensors", "h.66.mlp.dense_4h_to_h.bias": "model_00068-of-00072.safetensors", "h.66.mlp.dense_4h_to_h.weight": "model_00068-of-00072.safetensors", "h.66.mlp.dense_h_to_4h.bias": "model_00068-of-00072.safetensors", "h.66.mlp.dense_h_to_4h.weight": "model_00068-of-00072.safetensors", "h.66.post_attention_layernorm.bias": "model_00068-of-00072.safetensors", "h.66.post_attention_layernorm.weight": "model_00068-of-00072.safetensors", "h.66.self_attention.dense.bias": "model_00068-of-00072.safetensors", "h.66.self_attention.dense.weight": "model_00068-of-00072.safetensors", "h.66.self_attention.query_key_value.bias": "model_00068-of-00072.safetensors", "h.66.self_attention.query_key_value.weight": "model_00068-of-00072.safetensors", "h.67.input_layernorm.bias": "model_00069-of-00072.safetensors", "h.67.input_layernorm.weight": "model_00069-of-00072.safetensors", "h.67.mlp.dense_4h_to_h.bias": "model_00069-of-00072.safetensors", "h.67.mlp.dense_4h_to_h.weight": "model_00069-of-00072.safetensors", "h.67.mlp.dense_h_to_4h.bias": "model_00069-of-00072.safetensors", "h.67.mlp.dense_h_to_4h.weight": "model_00069-of-00072.safetensors", "h.67.post_attention_layernorm.bias": "model_00069-of-00072.safetensors", "h.67.post_attention_layernorm.weight": "model_00069-of-00072.safetensors", "h.67.self_attention.dense.bias": "model_00069-of-00072.safetensors", "h.67.self_attention.dense.weight": "model_00069-of-00072.safetensors", "h.67.self_attention.query_key_value.bias": "model_00069-of-00072.safetensors", "h.67.self_attention.query_key_value.weight": "model_00069-of-00072.safetensors", "h.68.input_layernorm.bias": "model_00070-of-00072.safetensors", "h.68.input_layernorm.weight": "model_00070-of-00072.safetensors", "h.68.mlp.dense_4h_to_h.bias": "model_00070-of-00072.safetensors", "h.68.mlp.dense_4h_to_h.weight": "model_00070-of-00072.safetensors", "h.68.mlp.dense_h_to_4h.bias": "model_00070-of-00072.safetensors", "h.68.mlp.dense_h_to_4h.weight": "model_00070-of-00072.safetensors", "h.68.post_attention_layernorm.bias": "model_00070-of-00072.safetensors", "h.68.post_attention_layernorm.weight": "model_00070-of-00072.safetensors", "h.68.self_attention.dense.bias": "model_00070-of-00072.safetensors", "h.68.self_attention.dense.weight": "model_00070-of-00072.safetensors", "h.68.self_attention.query_key_value.bias": "model_00070-of-00072.safetensors", "h.68.self_attention.query_key_value.weight": "model_00070-of-00072.safetensors", "h.69.input_layernorm.bias": "model_00071-of-00072.safetensors", "h.69.input_layernorm.weight": "model_00071-of-00072.safetensors", "h.69.mlp.dense_4h_to_h.bias": "model_00071-of-00072.safetensors", "h.69.mlp.dense_4h_to_h.weight": "model_00071-of-00072.safetensors", "h.69.mlp.dense_h_to_4h.bias": "model_00071-of-00072.safetensors", "h.69.mlp.dense_h_to_4h.weight": "model_00071-of-00072.safetensors", "h.69.post_attention_layernorm.bias": "model_00071-of-00072.safetensors", "h.69.post_attention_layernorm.weight": "model_00071-of-00072.safetensors", "h.69.self_attention.dense.bias": "model_00071-of-00072.safetensors", "h.69.self_attention.dense.weight": "model_00071-of-00072.safetensors", "h.69.self_attention.query_key_value.bias": "model_00071-of-00072.safetensors", "h.69.self_attention.query_key_value.weight": "model_00071-of-00072.safetensors", "h.7.input_layernorm.bias": "model_00009-of-00072.safetensors", "h.7.input_layernorm.weight": "model_00009-of-00072.safetensors", "h.7.mlp.dense_4h_to_h.bias": "model_00009-of-00072.safetensors", "h.7.mlp.dense_4h_to_h.weight": "model_00009-of-00072.safetensors", "h.7.mlp.dense_h_to_4h.bias": "model_00009-of-00072.safetensors", "h.7.mlp.dense_h_to_4h.weight": "model_00009-of-00072.safetensors", "h.7.post_attention_layernorm.bias": "model_00009-of-00072.safetensors", "h.7.post_attention_layernorm.weight": "model_00009-of-00072.safetensors", "h.7.self_attention.dense.bias": "model_00009-of-00072.safetensors", "h.7.self_attention.dense.weight": "model_00009-of-00072.safetensors", "h.7.self_attention.query_key_value.bias": "model_00009-of-00072.safetensors", "h.7.self_attention.query_key_value.weight": "model_00009-of-00072.safetensors", "h.8.input_layernorm.bias": "model_00010-of-00072.safetensors", "h.8.input_layernorm.weight": "model_00010-of-00072.safetensors", "h.8.mlp.dense_4h_to_h.bias": "model_00010-of-00072.safetensors", "h.8.mlp.dense_4h_to_h.weight": "model_00010-of-00072.safetensors", "h.8.mlp.dense_h_to_4h.bias": "model_00010-of-00072.safetensors", "h.8.mlp.dense_h_to_4h.weight": "model_00010-of-00072.safetensors", "h.8.post_attention_layernorm.bias": "model_00010-of-00072.safetensors", "h.8.post_attention_layernorm.weight": "model_00010-of-00072.safetensors", "h.8.self_attention.dense.bias": "model_00010-of-00072.safetensors", "h.8.self_attention.dense.weight": "model_00010-of-00072.safetensors", "h.8.self_attention.query_key_value.bias": "model_00010-of-00072.safetensors", "h.8.self_attention.query_key_value.weight": "model_00010-of-00072.safetensors", "h.9.input_layernorm.bias": "model_00011-of-00072.safetensors", "h.9.input_layernorm.weight": "model_00011-of-00072.safetensors", "h.9.mlp.dense_4h_to_h.bias": "model_00011-of-00072.safetensors", "h.9.mlp.dense_4h_to_h.weight": "model_00011-of-00072.safetensors", "h.9.mlp.dense_h_to_4h.bias": "model_00011-of-00072.safetensors", "h.9.mlp.dense_h_to_4h.weight": "model_00011-of-00072.safetensors", "h.9.post_attention_layernorm.bias": "model_00011-of-00072.safetensors", "h.9.post_attention_layernorm.weight": "model_00011-of-00072.safetensors", "h.9.self_attention.dense.bias": "model_00011-of-00072.safetensors", "h.9.self_attention.dense.weight": "model_00011-of-00072.safetensors", "h.9.self_attention.query_key_value.bias": "model_00011-of-00072.safetensors", "h.9.self_attention.query_key_value.weight": "model_00011-of-00072.safetensors", "ln_f.bias": "model_00072-of-00072.safetensors", "ln_f.weight": "model_00072-of-00072.safetensors", "word_embeddings.weight": "model_00001-of-00072.safetensors", "word_embeddings_layernorm.bias": "model_00001-of-00072.safetensors", "word_embeddings_layernorm.weight": "model_00001-of-00072.safetensors"}}
model_00001-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e30d7fb951f7c48f201cc140057f8ae25c2d71bfb316549303211337ffe1dcab
3
+ size 7193289054
model_00002-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1dade261e7004691ef59cd95e73ca05f3fc7169a3f81dd97e4be3205a639607
3
+ size 4932875534
model_00003-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f39850dc70a101c3d04114de061230a5c4452ecd4c9a457ef537fae029760d
3
+ size 4932875534
model_00004-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd182bc08fab3d1b5f5972a7c03bcdb43ea23027e12045fe11cd152942226192
3
+ size 4932875534
model_00005-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b955b02d4f3470c2bbe37471323267ec48bc8d07bd63a7216020c08afe3da0b4
3
+ size 4932875534
model_00006-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24a3eb73e75a703b6a8e56dbe0846219eadfcc8b836fc80d63b2989858314a6c
3
+ size 4932875534
model_00007-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:346e05c2b092ef24a23327b404dd1de955983309b1c1f256f8bf838a32dfc39e
3
+ size 4932875534
model_00008-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f61d0887d223777afc750c9c15e4fefa615d8e343888a8b61702f441ea2e7a4
3
+ size 4932875534
model_00009-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:403e479af3fb1e7f9d549803e3c304adec15835d85ae5ed90e3d98e7c778db75
3
+ size 4932875534
model_00010-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1094b467f2b74bc8caa7adcd1fb6bbf209f47c5974dbdaadf14737dce3459ce6
3
+ size 4932875534
model_00011-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b89d1804d7b8ece3956d7ccf3f90613f3575f1cab658ee4eedbd12cedc08fc
3
+ size 4932875534
model_00012-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f63d86cd387e2d0965161868a77201be50bc3ef6fdcbf011ab3497fdb102407
3
+ size 4932875550
model_00013-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:400c454c7b3dc0314469aa9d68a2c54e9f780ee6cde5d4dfd43947b845591a80
3
+ size 4932875550
model_00014-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa2a34bef5ef9db83cd26ca968d77e0bc6e26c7ab0dca578923cb26dabcb4ea2
3
+ size 4932875550
model_00015-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de6920421bacb945ead13bfaa431cf24b26f4acef2b87fc250e5bc4753282d19
3
+ size 4932875550
model_00016-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:397b3496f7b73d232f4db9fe3acb0d66def5a7968c445843ae114c8bf4f04d82
3
+ size 4932875550
model_00017-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ff46db6a5bfe99c0cda9b5c595637a25ae2613278b11babe121e91be57cd73c
3
+ size 4932875550
model_00018-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd60d1189874aad85021243355a81dfef757e7fd03081b87a8f0a6e600d43a76
3
+ size 4932875550
model_00019-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bed55ceba0bbe65c138f920fb65bed307df88a57c807102f232e4a9c4ca9ed8
3
+ size 4932875550
model_00020-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d419eb379949cab4c4685ef5eed2a8ad7b49ffb77f3671175642fc3683b9cee
3
+ size 4932875550
model_00021-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef5aa0fcf056c86f421e228434b62e40e6bd3cbc96c18844ba69db7590f4d934
3
+ size 4932875550
model_00022-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f54e50e576f21f299b952b312eac3476f3bc57231f53e2b4ab256a7395f1525
3
+ size 4932875550
model_00023-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54b57daa846238211b8adb64bc9e28a28c07e20ad503533b18f4d66ed2aa86f5
3
+ size 4932875550
model_00024-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56de07d68a73550b82f02ff42b4ba87e1cf35881ceab586e9759bff06c641e5c
3
+ size 4932875550
model_00025-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8aaa0e2f2d148cd2225cdfcffb71848faf27b4149c12a33637f175b0f6d9c15
3
+ size 4932875550
model_00026-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58f844b32d792a2d67090193e9e3b4f00ff6ad084da63a3e37dd9819149f83e
3
+ size 4932875550
model_00027-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaec09a779a8c1d1984fdf9121a66a47255eca358b030e11dbe616b36e8c7424
3
+ size 4932875550
model_00028-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3620ea8d35be2c0695b01415e8a5103d879f13d8f937ddc8c99d95b90c7d1d86
3
+ size 4932875550
model_00029-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c62e0cb19cd2ba0aaee5aa6bcd4bc54a1ea025f3e8e7f367e3f1011e9e9db24d
3
+ size 4932875550
model_00030-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9ab4ff64c2f10634a612c7bd229d3e57320303700744633f2e870ac5158f3ba
3
+ size 4932875550
model_00031-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34185a577ae1079cf2adf61b93fc6294926e61bbdef2652ba77d358bfda9266
3
+ size 4932875550
model_00032-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5106756852e637b27e4d3bf381e3d9a0c038015cfb1f5da3ac22c3685c9192f9
3
+ size 4932875550
model_00033-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9afd6c58e3ddef8107e11073f568fb0509fa9a4c8785fef43cd0824651c41c
3
+ size 4932875550
model_00034-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19754650bafb3dae6e9b45e7abfbeb5ca34debf45f6c63c3b37348aaeb5f3fe6
3
+ size 4932875550
model_00035-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d50afb042b8a0b173190717d5ef18fc1d0f0aae32ad357bd583c6c6822c2f40a
3
+ size 4932875550
model_00036-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:947d2a9b94a856c2f7e5cb691e9d29170e6a3884edaa6b03cc5004274551fb30
3
+ size 4932875550
model_00037-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62cfec0eb6ac041c5f31a578757b73d8b4448b4a7aa60ad642f870b36ff0718a
3
+ size 4932875550
model_00038-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc2a5b916cad450ea2a9a45f621387fbd3a8bdc365ddf117e225482c47b7537f
3
+ size 4932875550
model_00039-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d87b0681c7dbdaeafe1a245f90ae0ee8b5452ce55cd6b74c8cf765075e2634
3
+ size 4932875550
model_00040-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b428a6c9ee2bc4d9db55f8408f2e2fcb3bba35c887105088a1a0c58c059a875e
3
+ size 4932875550
model_00041-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e63a5ecea5b30a2085bd0e6c41e19c8011b267312800d03b953fb5a09cc5e598
3
+ size 4932875550
model_00042-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4406387ab67c44a857f2c6127d57ebc4c0e5e2a19365e72f6c83026c505d25fe
3
+ size 4932875550
model_00043-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77ddbfff64d58f5b8f99558e25774a555037b7017a3c421cd7b7ff5689391ca7
3
+ size 4932875550
model_00044-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf0b7898788424988074ceb40d17d5f39b038ac3817eff3f39350e6d74e62b06
3
+ size 4932875550
model_00045-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e965108f635a7e813fcc7e9196e748f70049f53289869b6d48770c4d779226eb
3
+ size 4932875550
model_00046-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c752ef46110a95957de506e7569be24ec5f90d833f61705e20764687bf8fb258
3
+ size 4932875550
model_00047-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2516bb84281f64429e13b31947308e6a16aba5130ca2dc20a7880a3decd6f084
3
+ size 4932875550
model_00048-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57e759b81e347209a468d37577194b2af9a45ac3093f8717a658df121d136093
3
+ size 4932875550