kreas commited on
Commit
8d0bd14
·
verified ·
1 Parent(s): e6c8cd9

Upload /Mistral-7B-v0.1-AWQ/AWQ_batch_size_1_sq_len_256_new_tokens_256/experiment_config.json with huggingface_hub

Browse files
Mistral-7B-v0.1-AWQ/AWQ_batch_size_1_sq_len_256_new_tokens_256/experiment_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "name": "pytorch",
4
  "version": "2.2.1+cu118",
5
  "_target_": "optimum_benchmark.backends.pytorch.backend.PyTorchBackend",
6
- "model": "microsoft/phi-2",
7
  "task": "text-generation",
8
  "library": "transformers",
9
  "device": "cuda",
@@ -29,13 +29,8 @@
29
  "cache_implementation": null,
30
  "torch_compile": false,
31
  "torch_compile_config": {},
32
- "quantization_scheme": "bnb",
33
- "quantization_config": {
34
- "llm_int8_threshold": 0.0,
35
- "load_in_4bit": true,
36
- "bnb_4bit_compute_dtype": "float16",
37
- "bnb_4bit_quant_type": "nf4"
38
- },
39
  "deepspeed_inference": false,
40
  "deepspeed_inference_config": {},
41
  "peft_type": null,
@@ -68,7 +63,7 @@
68
  },
69
  "call_kwargs": {}
70
  },
71
- "experiment_name": "phi-2-nf4",
72
  "task": null,
73
  "model": null,
74
  "device": null,
 
3
  "name": "pytorch",
4
  "version": "2.2.1+cu118",
5
  "_target_": "optimum_benchmark.backends.pytorch.backend.PyTorchBackend",
6
+ "model": "kreas/Mistral-7B-v0.1-AWQ",
7
  "task": "text-generation",
8
  "library": "transformers",
9
  "device": "cuda",
 
29
  "cache_implementation": null,
30
  "torch_compile": false,
31
  "torch_compile_config": {},
32
+ "quantization_scheme": "awq",
33
+ "quantization_config": {},
 
 
 
 
 
34
  "deepspeed_inference": false,
35
  "deepspeed_inference_config": {},
36
  "peft_type": null,
 
63
  },
64
  "call_kwargs": {}
65
  },
66
+ "experiment_name": "Mistral-7B-v0.1-AWQ-AWQ",
67
  "task": null,
68
  "model": null,
69
  "device": null,