kreas commited on
Commit
e91b5a0
1 Parent(s): e43c2ef

Upload /Llama-2-7b-hf/bf16_batch_size_1_sq_len_256_new_tokens_256/experiment_config.json with huggingface_hub

Browse files
Llama-2-7b-hf/bf16_batch_size_1_sq_len_256_new_tokens_256/experiment_config.json ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": {
3
+ "name": "pytorch",
4
+ "version": "2.2.1+cu118",
5
+ "_target_": "optimum_benchmark.backends.pytorch.backend.PyTorchBackend",
6
+ "model": "meta-llama/Llama-2-7b-hf",
7
+ "task": "text-generation",
8
+ "library": "transformers",
9
+ "device": "cuda",
10
+ "device_ids": "0",
11
+ "seed": 42,
12
+ "inter_op_num_threads": null,
13
+ "intra_op_num_threads": null,
14
+ "hub_kwargs": {
15
+ "revision": "main",
16
+ "force_download": false,
17
+ "local_files_only": false,
18
+ "trust_remote_code": false
19
+ },
20
+ "no_weights": true,
21
+ "device_map": null,
22
+ "torch_dtype": "bfloat16",
23
+ "amp_autocast": false,
24
+ "amp_dtype": null,
25
+ "eval_mode": true,
26
+ "to_bettertransformer": false,
27
+ "low_cpu_mem_usage": null,
28
+ "attn_implementation": "eager",
29
+ "cache_implementation": null,
30
+ "torch_compile": false,
31
+ "torch_compile_config": {},
32
+ "quantization_scheme": null,
33
+ "quantization_config": {},
34
+ "deepspeed_inference": false,
35
+ "deepspeed_inference_config": {},
36
+ "peft_type": null,
37
+ "peft_config": {}
38
+ },
39
+ "launcher": {
40
+ "name": "process",
41
+ "_target_": "optimum_benchmark.launchers.process.launcher.ProcessLauncher",
42
+ "device_isolation": false,
43
+ "start_method": "spawn"
44
+ },
45
+ "benchmark": {
46
+ "name": "inference",
47
+ "_target_": "optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark",
48
+ "duration": 10,
49
+ "warmup_runs": 10,
50
+ "input_shapes": {
51
+ "batch_size": 1,
52
+ "num_choices": 2,
53
+ "sequence_length": 256
54
+ },
55
+ "new_tokens": null,
56
+ "energy": false,
57
+ "memory": true,
58
+ "latency": true,
59
+ "forward_kwargs": {},
60
+ "generate_kwargs": {
61
+ "max_new_tokens": 256,
62
+ "min_new_tokens": 256
63
+ },
64
+ "call_kwargs": {}
65
+ },
66
+ "experiment_name": "Llama-2-7b-hf-bf16",
67
+ "task": null,
68
+ "model": null,
69
+ "device": null,
70
+ "library": null,
71
+ "environment": {
72
+ "cpu": " AMD Ryzen Threadripper PRO 5995WX 64-Cores",
73
+ "cpu_count": 128,
74
+ "cpu_ram_mb": 134841.131008,
75
+ "system": "Linux",
76
+ "machine": "x86_64",
77
+ "platform": "Linux-6.5.0-14-generic-x86_64-with-glibc2.35",
78
+ "processor": "x86_64",
79
+ "python_version": "3.10.12",
80
+ "gpu": [
81
+ "NVIDIA GeForce RTX 4090"
82
+ ],
83
+ "gpu_count": 1,
84
+ "gpu_vram_mb": 25757220864,
85
+ "optimum_benchmark_version": "0.2.0",
86
+ "optimum_benchmark_commit": "5bf349dbbc5ecdbf6ca94ac70f80ac44bd84dcc0",
87
+ "transformers_version": "4.39.0.dev0",
88
+ "transformers_commit": null,
89
+ "accelerate_version": "0.28.0",
90
+ "accelerate_commit": null,
91
+ "diffusers_version": null,
92
+ "diffusers_commit": null,
93
+ "optimum_version": null,
94
+ "optimum_commit": null,
95
+ "timm_version": null,
96
+ "timm_commit": null,
97
+ "peft_version": null,
98
+ "peft_commit": null
99
+ }
100
+ }