dacorvo HF staff commited on
Commit
594abb2
1 Parent(s): f99a301

Add most popular llama variants

Browse files
inference-cache-config/llama-variants.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "defog/sqlcoder-7b-2": [
3
+ {
4
+ "batch_size": 1,
5
+ "sequence_length": 4096,
6
+ "num_cores": 2,
7
+ "auto_cast_type": "fp16"
8
+ },
9
+ {
10
+ "batch_size": 4,
11
+ "sequence_length": 4096,
12
+ "num_cores": 2,
13
+ "auto_cast_type": "fp16"
14
+ }
15
+ ],
16
+ "m-a-p/OpenCodeInterpreter-DS-6.7B": [
17
+ {
18
+ "batch_size": 1,
19
+ "sequence_length": 4096,
20
+ "num_cores": 2,
21
+ "auto_cast_type": "fp16"
22
+ },
23
+ {
24
+ "batch_size": 4,
25
+ "sequence_length": 4096,
26
+ "num_cores": 2,
27
+ "auto_cast_type": "fp16"
28
+ }
29
+ ],
30
+ "ibm/labradorite-13b": [
31
+ {
32
+ "batch_size": 1,
33
+ "sequence_length": 4096,
34
+ "num_cores": 8,
35
+ "auto_cast_type": "fp16"
36
+ },
37
+ {
38
+ "batch_size": 4,
39
+ "sequence_length": 4096,
40
+ "num_cores": 8,
41
+ "auto_cast_type": "fp16"
42
+ },
43
+ {
44
+ "batch_size": 8,
45
+ "sequence_length": 4096,
46
+ "num_cores": 8,
47
+ "auto_cast_type": "fp16"
48
+ }
49
+ ],
50
+ "abacusai/Smaug-72B-v0.1": [
51
+ {
52
+ "batch_size": 1,
53
+ "sequence_length": 4096,
54
+ "num_cores": 24,
55
+ "auto_cast_type": "fp16"
56
+ },
57
+ {
58
+ "batch_size": 4,
59
+ "sequence_length": 4096,
60
+ "num_cores": 24,
61
+ "auto_cast_type": "fp16"
62
+ }
63
+ ],
64
+ "gorilla-llm/gorilla-openfunctions-v2": [
65
+ {
66
+ "batch_size": 1,
67
+ "sequence_length": 4096,
68
+ "num_cores": 2,
69
+ "auto_cast_type": "fp16"
70
+ },
71
+ {
72
+ "batch_size": 4,
73
+ "sequence_length": 4096,
74
+ "num_cores": 2,
75
+ "auto_cast_type": "fp16"
76
+ }
77
+ ],
78
+ "m-a-p/ChatMusician": [
79
+ {
80
+ "batch_size": 1,
81
+ "sequence_length": 4096,
82
+ "num_cores": 2,
83
+ "auto_cast_type": "fp16"
84
+ },
85
+ {
86
+ "batch_size": 4,
87
+ "sequence_length": 4096,
88
+ "num_cores": 2,
89
+ "auto_cast_type": "fp16"
90
+ }
91
+ ],
92
+ "LargeWorldModel/LWM-Text-Chat-1M": [
93
+ {
94
+ "batch_size": 1,
95
+ "sequence_length": 4096,
96
+ "num_cores": 2,
97
+ "auto_cast_type": "fp16"
98
+ },
99
+ {
100
+ "batch_size": 4,
101
+ "sequence_length": 4096,
102
+ "num_cores": 2,
103
+ "auto_cast_type": "fp16"
104
+ }
105
+ ],
106
+ "HuggingFaceTB/cosmo-1b": [
107
+ {
108
+ "batch_size": 1,
109
+ "sequence_length": 4096,
110
+ "num_cores": 2,
111
+ "auto_cast_type": "fp16"
112
+ },
113
+ {
114
+ "batch_size": 4,
115
+ "sequence_length": 4096,
116
+ "num_cores": 2,
117
+ "auto_cast_type": "fp16"
118
+ }
119
+ ],
120
+ "01-ai/Yi-34B-200K": [
121
+ {
122
+ "batch_size": 1,
123
+ "sequence_length": 4096,
124
+ "num_cores": 24,
125
+ "auto_cast_type": "fp16"
126
+ },
127
+ {
128
+ "batch_size": 4,
129
+ "sequence_length": 4096,
130
+ "num_cores": 24,
131
+ "auto_cast_type": "fp16"
132
+ }
133
+ ]
134
+ }