{ "_name_or_path": "Phi-3-medium-128k-instruct", "architectures": [ "Phi3ForCausalLM" ], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_phi3.Phi3Config", "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" }, "bos_token_id": 1, "embd_pdrop": 0.0, "eos_token_id": 32000, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 17920, "max_position_embeddings": 131072, "model_type": "phi3", "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 10, "original_max_position_embeddings": 4096, "pad_token_id": null, "resid_pdrop": 0.0, "rms_norm_eps": 1e-05, "rope_scaling": { "long_factor": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.25, 1.25, 1.5, 2.0, 2.75, 5.75, 5.75, 6.5, 9.25, 11.0, 13.25, 19.25, 19.75, 19.75, 21.25, 21.5, 26.5, 30.0, 33.75, 35.25, 38.5, 42.0, 42.25, 46.0, 47.0, 50.0, 50.5, 51.0, 52.0, 52.75, 53.75, 54.75, 57.0, 57.25, 58.5, 59.25, 59.5, 62.0, 62.5, 62.75, 63.25, 63.25, 63.25, 63.75, 64.0, 64.0, 64.25, 64.5, 64.5, 65.0, 65.0 ], "short_factor": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.01, 1.02, 1.02, 1.04, 1.04, 1.07, 1.07, 1.1, 1.3000000000000003, 1.3000000000000003, 1.5000000000000004, 1.5700000000000005, 1.9000000000000008, 2.3100000000000014, 2.759999999999992, 3.3899999999999784, 3.9399999999999666, 4.009999999999965, 4.289999999999959, 4.349999999999958, 5.349999999999937, 6.659999999999909, 7.029999999999901, 7.51999999999989, 8.00999999999988, 8.249999999999876, 8.279999999999875, 9.629999999999846, 9.89999999999984, 10.589999999999826, 11.049999999999816, 11.7899999999998, 12.189999999999792, 12.889999999999777, 13.129999999999772, 13.16999999999977, 13.20999999999977, 13.479999999999764, 13.539999999999763, 13.779999999999758, 13.929999999999755, 14.429999999999744, 14.759999999999737, 15.149999999999729, 15.419999999999723, 15.53999999999972, 15.659999999999718, 15.749999999999716, 15.759999999999716, 15.799999999999715, 16.05999999999971, 16.079999999999714, 16.11999999999972, 16.11999999999972, 16.18999999999973, 16.31999999999975, 16.539999999999786, 16.799999999999827 ], "type": "su" }, "rope_theta": 10000.0, "sliding_window": 131072, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.39.3", "use_cache": true, "attention_bias": false, "vocab_size": 32064 }