Text Generation
Transformers
English
encoder_decoder
code
natural language understanding
machine learning
research
introspection
self-reflection
conversational
Inference Endpoints
Or4cl3-1 commited on
Commit
3523256
1 Parent(s): 26d22a0

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +24 -36
config.json CHANGED
@@ -1,41 +1,29 @@
1
  {
2
- "model_type": "unified_multimodal_language_model",
 
 
3
  "model_name": "CognoSphere/CSUMLM",
4
  "model_description": "CognoSphere Unified Multimodal Language Model (CSUMLM) is an advanced AI model capable of processing and generating text, images, and audio data. It combines transfer learning, deep learning, self-supervised learning, meta-learning, deep meta-learning, reinforcement learning, and cross-domain analogy extraction to achieve state-of-the-art performance in multimodal tasks.",
5
- "model_architecture": {
6
- "text_encoder": {
7
- "type": "transformer",
8
- "num_layers": 12,
9
- "hidden_size": 768,
10
- "num_attention_heads": 12,
11
- "intermediate_size": 3072
12
- },
13
- "image_encoder": {
14
- "type": "convolutional",
15
- "num_layers": 5,
16
- "kernel_sizes": [3, 3, 3, 3, 3],
17
- "channels": [64, 128, 256, 512, 512]
18
- },
19
- "audio_encoder": {
20
- "type": "recurrent",
21
- "num_layers": 3,
22
- "hidden_size": 512,
23
- "bidirectional": true
24
- },
25
- "multimodal_fusion": {
26
- "type": "transformer",
27
- "num_layers": 6,
28
- "hidden_size": 1024,
29
- "num_attention_heads": 16,
30
- "intermediate_size": 4096
31
- },
32
- "decoder": {
33
- "type": "transformer",
34
- "num_layers": 12,
35
- "hidden_size": 768,
36
- "num_attention_heads": 12,
37
- "intermediate_size": 3072
38
- }
39
  },
40
  "training_data": {
41
  "text": [
@@ -71,4 +59,4 @@
71
  "rouge",
72
  "cider"
73
  ]
74
- }
 
1
  {
2
+ "model_type": "encoder_decoder",
3
+ "encoder_type": "csumlm_encoder",
4
+ "decoder_type": "csumlm_decoder",
5
  "model_name": "CognoSphere/CSUMLM",
6
  "model_description": "CognoSphere Unified Multimodal Language Model (CSUMLM) is an advanced AI model capable of processing and generating text, images, and audio data. It combines transfer learning, deep learning, self-supervised learning, meta-learning, deep meta-learning, reinforcement learning, and cross-domain analogy extraction to achieve state-of-the-art performance in multimodal tasks.",
7
+ "encoder": {
8
+ "type": "transformer",
9
+ "num_layers": 12,
10
+ "hidden_size": 768,
11
+ "num_attention_heads": 12,
12
+ "intermediate_size": 3072
13
+ },
14
+ "decoder": {
15
+ "type": "transformer",
16
+ "num_layers": 12,
17
+ "hidden_size": 768,
18
+ "num_attention_heads": 12,
19
+ "intermediate_size": 3072
20
+ },
21
+ "multimodal_fusion": {
22
+ "type": "transformer",
23
+ "num_layers": 6,
24
+ "hidden_size": 1024,
25
+ "num_attention_heads": 16,
26
+ "intermediate_size": 4096
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
  "training_data": {
29
  "text": [
 
59
  "rouge",
60
  "cider"
61
  ]
62
+ }