dacorvo HF staff commited on
Commit
5d8c4f2
1 Parent(s): 83cd17a

Update inference-cache-config/llama3.json

Browse files
Files changed (1) hide show
  1. inference-cache-config/llama3.json +9 -9
inference-cache-config/llama3.json CHANGED
@@ -4,43 +4,43 @@
4
  "batch_size": 1,
5
  "sequence_length": 4096,
6
  "num_cores": 2,
7
- "auto_cast_type": "bf16"
8
  },
9
  {
10
  "batch_size": 4,
11
  "sequence_length": 4096,
12
  "num_cores": 2,
13
- "auto_cast_type": "bf16"
14
  },
15
  {
16
  "batch_size": 8,
17
  "sequence_length": 4096,
18
  "num_cores": 2,
19
- "auto_cast_type": "bf16"
20
  },
21
  {
22
  "batch_size": 4,
23
  "sequence_length": 4096,
24
  "num_cores": 8,
25
- "auto_cast_type": "bf16"
26
  },
27
  {
28
  "batch_size": 8,
29
  "sequence_length": 4096,
30
  "num_cores": 8,
31
- "auto_cast_type": "bf16"
32
  },
33
  {
34
  "batch_size": 16,
35
  "sequence_length": 4096,
36
  "num_cores": 8,
37
- "auto_cast_type": "bf16"
38
  },
39
  {
40
  "batch_size": 32,
41
  "sequence_length": 4096,
42
  "num_cores": 8,
43
- "auto_cast_type": "bf16"
44
  }
45
  ],
46
  "meta-llama/Meta-Llama-3-70B": [
@@ -48,13 +48,13 @@
48
  "batch_size": 1,
49
  "sequence_length": 4096,
50
  "num_cores": 24,
51
- "auto_cast_type": "bf16"
52
  },
53
  {
54
  "batch_size": 4,
55
  "sequence_length": 4096,
56
  "num_cores": 24,
57
- "auto_cast_type": "bf16"
58
  }
59
  ]
60
  }
 
4
  "batch_size": 1,
5
  "sequence_length": 4096,
6
  "num_cores": 2,
7
+ "auto_cast_type": "fp16"
8
  },
9
  {
10
  "batch_size": 4,
11
  "sequence_length": 4096,
12
  "num_cores": 2,
13
+ "auto_cast_type": "fp16"
14
  },
15
  {
16
  "batch_size": 8,
17
  "sequence_length": 4096,
18
  "num_cores": 2,
19
+ "auto_cast_type": "fp16"
20
  },
21
  {
22
  "batch_size": 4,
23
  "sequence_length": 4096,
24
  "num_cores": 8,
25
+ "auto_cast_type": "fp16"
26
  },
27
  {
28
  "batch_size": 8,
29
  "sequence_length": 4096,
30
  "num_cores": 8,
31
+ "auto_cast_type": "fp16"
32
  },
33
  {
34
  "batch_size": 16,
35
  "sequence_length": 4096,
36
  "num_cores": 8,
37
+ "auto_cast_type": "fp16"
38
  },
39
  {
40
  "batch_size": 32,
41
  "sequence_length": 4096,
42
  "num_cores": 8,
43
+ "auto_cast_type": "fp16"
44
  }
45
  ],
46
  "meta-llama/Meta-Llama-3-70B": [
 
48
  "batch_size": 1,
49
  "sequence_length": 4096,
50
  "num_cores": 24,
51
+ "auto_cast_type": "fp16"
52
  },
53
  {
54
  "batch_size": 4,
55
  "sequence_length": 4096,
56
  "num_cores": 24,
57
+ "auto_cast_type": "fp16"
58
  }
59
  ]
60
  }