kreas commited on
Commit
7c8036a
1 Parent(s): 5ddb935

Upload /Llama-2-13b-hf/nf4_batch_size_1_sq_len_256_new_tokens_256/experiment_config.json with huggingface_hub

Browse files
Llama-2-13b-hf/nf4_batch_size_1_sq_len_256_new_tokens_256/experiment_config.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": {
3
+ "name": "pytorch",
4
+ "version": "2.2.1+cu118",
5
+ "_target_": "optimum_benchmark.backends.pytorch.backend.PyTorchBackend",
6
+ "model": "meta-llama/Llama-2-13b-hf",
7
+ "task": "text-generation",
8
+ "library": "transformers",
9
+ "device": "cuda",
10
+ "device_ids": "0",
11
+ "seed": 42,
12
+ "inter_op_num_threads": null,
13
+ "intra_op_num_threads": null,
14
+ "hub_kwargs": {
15
+ "revision": "main",
16
+ "force_download": false,
17
+ "local_files_only": false,
18
+ "trust_remote_code": false
19
+ },
20
+ "no_weights": true,
21
+ "device_map": null,
22
+ "torch_dtype": "float16",
23
+ "amp_autocast": false,
24
+ "amp_dtype": null,
25
+ "eval_mode": true,
26
+ "to_bettertransformer": false,
27
+ "low_cpu_mem_usage": null,
28
+ "attn_implementation": "eager",
29
+ "cache_implementation": null,
30
+ "torch_compile": false,
31
+ "torch_compile_config": {},
32
+ "quantization_scheme": "bnb",
33
+ "quantization_config": {
34
+ "llm_int8_threshold": 0.0,
35
+ "load_in_4bit": true,
36
+ "bnb_4bit_compute_dtype": "float16",
37
+ "bnb_4bit_quant_type": "nf4"
38
+ },
39
+ "deepspeed_inference": false,
40
+ "deepspeed_inference_config": {},
41
+ "peft_type": null,
42
+ "peft_config": {}
43
+ },
44
+ "launcher": {
45
+ "name": "process",
46
+ "_target_": "optimum_benchmark.launchers.process.launcher.ProcessLauncher",
47
+ "device_isolation": false,
48
+ "start_method": "spawn"
49
+ },
50
+ "benchmark": {
51
+ "name": "inference",
52
+ "_target_": "optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark",
53
+ "duration": 10,
54
+ "warmup_runs": 10,
55
+ "input_shapes": {
56
+ "batch_size": 1,
57
+ "num_choices": 2,
58
+ "sequence_length": 256
59
+ },
60
+ "new_tokens": null,
61
+ "energy": false,
62
+ "memory": true,
63
+ "latency": true,
64
+ "forward_kwargs": {},
65
+ "generate_kwargs": {
66
+ "max_new_tokens": 256,
67
+ "min_new_tokens": 256
68
+ },
69
+ "call_kwargs": {}
70
+ },
71
+ "experiment_name": "Llama-2-13b-hf-nf4",
72
+ "task": null,
73
+ "model": null,
74
+ "device": null,
75
+ "library": null,
76
+ "environment": {
77
+ "cpu": " AMD Ryzen Threadripper PRO 5995WX 64-Cores",
78
+ "cpu_count": 128,
79
+ "cpu_ram_mb": 134841.131008,
80
+ "system": "Linux",
81
+ "machine": "x86_64",
82
+ "platform": "Linux-6.5.0-14-generic-x86_64-with-glibc2.35",
83
+ "processor": "x86_64",
84
+ "python_version": "3.10.12",
85
+ "gpu": [
86
+ "NVIDIA GeForce RTX 4090"
87
+ ],
88
+ "gpu_count": 1,
89
+ "gpu_vram_mb": 25757220864,
90
+ "optimum_benchmark_version": "0.2.0",
91
+ "optimum_benchmark_commit": "5bf349dbbc5ecdbf6ca94ac70f80ac44bd84dcc0",
92
+ "transformers_version": "4.39.0.dev0",
93
+ "transformers_commit": null,
94
+ "accelerate_version": "0.28.0",
95
+ "accelerate_commit": null,
96
+ "diffusers_version": null,
97
+ "diffusers_commit": null,
98
+ "optimum_version": null,
99
+ "optimum_commit": null,
100
+ "timm_version": null,
101
+ "timm_commit": null,
102
+ "peft_version": null,
103
+ "peft_commit": null
104
+ }
105
+ }