dacorvo HF staff commited on
Commit
2d87237
1 Parent(s): aeec59c

Add more llama config

Browse files
Files changed (1) hide show
  1. inference-cache-config/llama.json +39 -3
inference-cache-config/llama.json CHANGED
@@ -59,13 +59,25 @@
59
  "sequence_length": 4096,
60
  "num_cores": 24,
61
  "auto_cast_type": "fp16"
 
 
 
 
 
 
 
 
 
 
 
 
62
  }
63
  ],
64
  "meta-llama/Llama-2-13b-chat-hf": [
65
  {
66
  "batch_size": 1,
67
  "sequence_length": 4096,
68
- "num_cores": 8,
69
  "auto_cast_type": "fp16"
70
  },
71
  {
@@ -77,7 +89,7 @@
77
  {
78
  "batch_size": 4,
79
  "sequence_length": 4096,
80
- "num_cores": 8,
81
  "auto_cast_type": "fp16"
82
  },
83
  {
@@ -89,7 +101,7 @@
89
  {
90
  "batch_size": 8,
91
  "sequence_length": 4096,
92
- "num_cores": 8,
93
  "auto_cast_type": "fp16"
94
  },
95
  {
@@ -97,6 +109,30 @@
97
  "sequence_length": 4096,
98
  "num_cores": 24,
99
  "auto_cast_type": "fp16"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  }
101
  ],
102
  "meta-llama/Llama-2-70b-chat-hf": [
 
59
  "sequence_length": 4096,
60
  "num_cores": 24,
61
  "auto_cast_type": "fp16"
62
+ },
63
+ {
64
+ "batch_size": 32,
65
+ "sequence_length": 4096,
66
+ "num_cores": 8,
67
+ "auto_cast_type": "fp16"
68
+ },
69
+ {
70
+ "batch_size": 32,
71
+ "sequence_length": 4096,
72
+ "num_cores": 24,
73
+ "auto_cast_type": "fp16"
74
  }
75
  ],
76
  "meta-llama/Llama-2-13b-chat-hf": [
77
  {
78
  "batch_size": 1,
79
  "sequence_length": 4096,
80
+ "num_cores": 12,
81
  "auto_cast_type": "fp16"
82
  },
83
  {
 
89
  {
90
  "batch_size": 4,
91
  "sequence_length": 4096,
92
+ "num_cores": 12,
93
  "auto_cast_type": "fp16"
94
  },
95
  {
 
101
  {
102
  "batch_size": 8,
103
  "sequence_length": 4096,
104
+ "num_cores": 12,
105
  "auto_cast_type": "fp16"
106
  },
107
  {
 
109
  "sequence_length": 4096,
110
  "num_cores": 24,
111
  "auto_cast_type": "fp16"
112
+ },
113
+ {
114
+ "batch_size": 16,
115
+ "sequence_length": 4096,
116
+ "num_cores": 12,
117
+ "auto_cast_type": "fp16"
118
+ },
119
+ {
120
+ "batch_size": 16,
121
+ "sequence_length": 4096,
122
+ "num_cores": 24,
123
+ "auto_cast_type": "fp16"
124
+ },
125
+ {
126
+ "batch_size": 32,
127
+ "sequence_length": 4096,
128
+ "num_cores": 12,
129
+ "auto_cast_type": "fp16"
130
+ },
131
+ {
132
+ "batch_size": 32,
133
+ "sequence_length": 4096,
134
+ "num_cores": 24,
135
+ "auto_cast_type": "fp16"
136
  }
137
  ],
138
  "meta-llama/Llama-2-70b-chat-hf": [