{ "_name_or_path": "PersianStories-4k", "architectures": [ "Phi3ForCausalLM" ], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration.PersianStoriesConfig", "AutoModelForCausalLM": "modeling.PersianStoriesForCausalLM" }, "bos_token_id": 1, "embd_pdrop": 0.0, "eos_token_id": 32000, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 4096, "model_type": "phi3", "num_attention_heads": 16, "num_hidden_layers": 24, "num_key_value_heads": 4, "original_max_position_embeddings": 4096, "pad_token_id": 32000, "resid_pdrop": 0.0, "rms_norm_eps": 1e-05, "rope_position_scale": 1.0, "rope_scaling": { "long_factor": [ 1.0, 1.01, 1.01, 1.02, 1.04, 1.04, 1.04, 1.05, 1.05, 1.06, 1.07, 1.08, 1.08, 1.08, 1.08, 1.08, 1.08, 1.08, 1.09, 1.09, 1.2, 2.31, 3.76, 9.38, 10.1, 10.8, 18.1, 25.2, 25.3, 26.1, 26.6, 30.2, 33.0, 41.5, 44.4, 44.8, 50.2, 51.9, 59.3, 62.7, 66.1, 66.3, 85.8, 89.3, 90.0, 99.9, 107.0, 110.0, 111.0, 117.0, 118.0, 121.0, 122.0, 127.0, 127.0, 128.0, 128.0, 128.0, 128.0, 128.0, 128.0, 129.0, 129.0, 129.0 ], "long_mscale": 1.1902380714238083, "original_max_position_embeddings": 8192, "short_factor": [ 1.02, 1.02, 1.05, 1.05, 1.06, 1.08, 1.08, 1.08, 1.08, 1.12, 1.1800000000000002, 1.1900000000000002, 1.1900000000000002, 1.2100000000000002, 1.2300000000000002, 1.2400000000000002, 1.2400000000000002, 1.2500000000000002, 1.3000000000000003, 1.3100000000000003, 1.4600000000000004, 1.5100000000000005, 1.7000000000000006, 1.9300000000000008, 2.080000000000001, 2.4399999999999933, 3.2199999999999767, 3.4499999999999718, 3.579999999999969, 4.669999999999946, 4.779999999999943, 5.999999999999917, 6.009999999999917, 6.4199999999999084, 6.619999999999904, 7.189999999999892, 7.3099999999998895, 7.339999999999889, 7.479999999999886, 9.749999999999837, 10.919999999999812, 11.219999999999805, 11.749999999999794, 11.979999999999789, 13.239999999999762, 13.579999999999755, 13.669999999999753, 13.82999999999975, 14.009999999999746, 14.679999999999731, 14.889999999999727, 15.769999999999708, 15.769999999999708, 15.819999999999707, 15.839999999999707, 15.919999999999705, 16.029999999999703, 16.12999999999972, 16.44999999999977, 16.44999999999977, 16.77999999999982, 16.83999999999983, 16.83999999999983, 16.889999999999837 ], "short_mscale": 1.0, "type": "su" }, "rope_theta": 10000.0, "sliding_window": 2047, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.40.2", "use_cache": true, "attention_bias": false, "vocab_size": 32064 }