dacorvo HF staff commited on
Commit
593822e
1 Parent(s): 608a595

Added Llama-70b batch_size 4 to inference cache

Browse files
Files changed (1) hide show
  1. inference-cache-config/llama.json +6 -0
inference-cache-config/llama.json CHANGED
@@ -105,6 +105,12 @@
105
  "sequence_length": 4096,
106
  "num_cores": 24,
107
  "auto_cast_type": "fp16"
 
 
 
 
 
 
108
  }
109
  ]
110
  }
 
105
  "sequence_length": 4096,
106
  "num_cores": 24,
107
  "auto_cast_type": "fp16"
108
+ },
109
+ {
110
+ "batch_size": 4,
111
+ "sequence_length": 4096,
112
+ "num_cores": 24,
113
+ "auto_cast_type": "fp16"
114
  }
115
  ]
116
  }