philschmid HF staff commited on
Commit
1960ccb
1 Parent(s): c813817

Create inference-cache-config/llama.json

Browse files
Files changed (1) hide show
  1. inference-cache-config/llama.json +110 -0
inference-cache-config/llama.json ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "meta-llama/Llama-2-7b-chat-hf": [
3
+ {
4
+ "batch_size": 1,
5
+ "sequence_length": 4096,
6
+ "num_cores": 2,
7
+ "auto_cast_type": "fp16"
8
+ },
9
+ {
10
+ "batch_size": 1,
11
+ "sequence_length": 4096,
12
+ "num_cores": 8,
13
+ "auto_cast_type": "fp16"
14
+ },
15
+ {
16
+ "batch_size": 1,
17
+ "sequence_length": 4096,
18
+ "num_cores": 24,
19
+ "auto_cast_type": "fp16"
20
+ },
21
+ {
22
+ "batch_size": 4,
23
+ "sequence_length": 4096,
24
+ "num_cores": 2,
25
+ "auto_cast_type": "fp16"
26
+ },
27
+ {
28
+ "batch_size": 4,
29
+ "sequence_length": 4096,
30
+ "num_cores": 8,
31
+ "auto_cast_type": "fp16"
32
+ },
33
+ {
34
+ "batch_size": 4,
35
+ "sequence_length": 4096,
36
+ "num_cores": 24,
37
+ "auto_cast_type": "fp16"
38
+ },
39
+ {
40
+ "batch_size": 8,
41
+ "sequence_length": 4096,
42
+ "num_cores": 8,
43
+ "auto_cast_type": "fp16"
44
+ },
45
+ {
46
+ "batch_size": 8,
47
+ "sequence_length": 4096,
48
+ "num_cores": 24,
49
+ "auto_cast_type": "fp16"
50
+ },
51
+ {
52
+ "batch_size": 16,
53
+ "sequence_length": 4096,
54
+ "num_cores": 8,
55
+ "auto_cast_type": "fp16"
56
+ },
57
+ {
58
+ "batch_size": 16,
59
+ "sequence_length": 4096,
60
+ "num_cores": 24,
61
+ "auto_cast_type": "fp16"
62
+ }
63
+ ],
64
+ "meta-llama/Llama-2-13b-chat-hf": [
65
+ {
66
+ "batch_size": 1,
67
+ "sequence_length": 4096,
68
+ "num_cores": 8,
69
+ "auto_cast_type": "fp16"
70
+ },
71
+ {
72
+ "batch_size": 1,
73
+ "sequence_length": 4096,
74
+ "num_cores": 24,
75
+ "auto_cast_type": "fp16"
76
+ },
77
+ {
78
+ "batch_size": 4,
79
+ "sequence_length": 4096,
80
+ "num_cores": 8,
81
+ "auto_cast_type": "fp16"
82
+ },
83
+ {
84
+ "batch_size": 4,
85
+ "sequence_length": 4096,
86
+ "num_cores": 24,
87
+ "auto_cast_type": "fp16"
88
+ },
89
+ {
90
+ "batch_size": 8,
91
+ "sequence_length": 4096,
92
+ "num_cores": 8,
93
+ "auto_cast_type": "fp16"
94
+ },
95
+ {
96
+ "batch_size": 8,
97
+ "sequence_length": 4096,
98
+ "num_cores": 24,
99
+ "auto_cast_type": "fp16"
100
+ }
101
+ ],
102
+ "meta-llama/Llama-2-70b-chat-hf": [
103
+ {
104
+ "batch_size": 1,
105
+ "sequence_length": 4096,
106
+ "num_cores": 24,
107
+ "auto_cast_type": "fp16"
108
+ }
109
+ ]
110
+ }