{ "_name_or_path": "/Users/guynachshon/Documents/code/research/temp/glm_large_chinese", "architectures": [ "GLMModel" ], "attention_dropout_prob": 0.1, "attention_scale": 1.0, "block_position_encoding": true, "checkpoint_activations": false, "checkpoint_num_layers": 1, "embedding_dropout_prob": 0.1, "hidden_size": 1024, "initializer_range": 0.02, "max_sequence_length": 1024, "model_type": "glm", "num_attention_heads": 16, "num_layers": 24, "output_dropout_prob": 0.1, "output_predict": true, "parallel_output": true, "pool_token": "cls", "relative_encoding": false, "spell_func": "lstm", "spell_length": null, "torch_dtype": "float32", "transformers_version": "4.35.2", "vocab_size": 50048 }