Upload AriaForConditionalGeneration

#2
by m-ric HF staff - opened
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
- license: apache-2.0
3
  base_model:
4
  - rhymes-ai/Aria
 
5
  base_model_relation: quantized
6
  ---
7
 
 
1
  ---
 
2
  base_model:
3
  - rhymes-ai/Aria
4
+ license: apache-2.0
5
  base_model_relation: quantized
6
  ---
7
 
config.json CHANGED
@@ -1,16 +1,20 @@
1
  {
2
- "_name_or_path": "./",
3
  "architectures": [
4
  "AriaForConditionalGeneration"
5
  ],
6
  "auto_map": {
7
- "AutoConfig": "configuration_aria.AriaConfig",
8
  "AutoModelForCausalLM": "modeling_aria.AriaForConditionalGeneration"
9
  },
10
  "do_sample": null,
11
  "ignore_index": -100,
12
  "image_token_index": 9,
 
 
 
13
  "model_type": "aria",
 
14
  "projector_patch_to_query_dict": {
15
  "1225": 128,
16
  "4900": 256
@@ -20,7 +24,7 @@
20
  "hidden_size": 2560,
21
  "intermediate_size": 13568,
22
  "max_position_embeddings": 65536,
23
- "model_type": "aria_moe_lm",
24
  "moe_intermediate_size": 1664,
25
  "moe_num_experts": 64,
26
  "moe_topk": 6,
@@ -31,20 +35,20 @@
31
  "rope_theta": 5000000,
32
  "vocab_size": 100352
33
  },
34
- "torch_dtype": "bfloat16",
35
- "transformers_version": "4.45.0",
36
  "vision_config": {
37
  "_flash_attn_2_enabled": true,
38
  "architectures": [
39
  "AriaVisionModel"
40
  ],
41
- "hidden_size": 1152,
42
  "image_size": 980,
43
  "intermediate_size": 4304,
44
- "model_type": "aria_vision_model",
45
- "num_attention_heads": 16,
46
  "num_hidden_layers": 27,
47
  "patch_size": 14,
48
  "torch_dtype": "bfloat16"
49
- }
 
50
  }
 
1
  {
2
+ "_name_or_path": "rhymes-ai/Aria-torchao-int8wo",
3
  "architectures": [
4
  "AriaForConditionalGeneration"
5
  ],
6
  "auto_map": {
7
+ "AutoConfig": "modeling_aria.AriaConfig",
8
  "AutoModelForCausalLM": "modeling_aria.AriaForConditionalGeneration"
9
  },
10
  "do_sample": null,
11
  "ignore_index": -100,
12
  "image_token_index": 9,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 1664,
15
+ "max_value_projector_patch_to_query_dict": 256,
16
  "model_type": "aria",
17
+ "pad_token_id": 2,
18
  "projector_patch_to_query_dict": {
19
  "1225": 128,
20
  "4900": 256
 
24
  "hidden_size": 2560,
25
  "intermediate_size": 13568,
26
  "max_position_embeddings": 65536,
27
+ "model_type": "aria_text",
28
  "moe_intermediate_size": 1664,
29
  "moe_num_experts": 64,
30
  "moe_topk": 6,
 
35
  "rope_theta": 5000000,
36
  "vocab_size": 100352
37
  },
38
+ "torch_dtype": "float16",
39
+ "transformers_version": "4.48.0.dev0",
40
  "vision_config": {
41
  "_flash_attn_2_enabled": true,
42
  "architectures": [
43
  "AriaVisionModel"
44
  ],
45
+ "attention_heads": 16,
46
  "image_size": 980,
47
  "intermediate_size": 4304,
48
+ "model_type": "idefics3_vision",
 
49
  "num_hidden_layers": 27,
50
  "patch_size": 14,
51
  "torch_dtype": "bfloat16"
52
+ },
53
+ "vision_feature_layer": -1
54
  }
generation_config.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
- "do_sample": true,
5
  "eos_token_id": 2,
6
- "temperature": 0.7,
7
- "transformers_version": "4.45.0"
8
  }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
 
4
  "eos_token_id": 2,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.48.0.dev0"
7
  }
pytorch_model-00001-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:480dae1a48acf8a46f33ba04020bc50ca1c25d2c9be439710e1dd13d2154cfe6
3
+ size 3764
pytorch_model-00002-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2385c41974e1f894b26a8865f6f0271a54998e27c51083564e3e655c6efeee1d
3
+ size 64086
pytorch_model-00003-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db57e5cd720c4164439696f11239a777f5ba1a566e9b10d57bb7f206bc112624
3
+ size 18932
pytorch_model.bin.index.json CHANGED
The diff for this file is too large to render. See raw diff