tamang0000 commited on
Commit
463e386
1 Parent(s): 406f23f

Trained with Unsloth

Browse files
Files changed (4) hide show
  1. README.md +4 -29
  2. config.json +30 -0
  3. generation_config.json +6 -0
  4. model.safetensors +3 -0
README.md CHANGED
@@ -9,40 +9,15 @@ tags:
9
  - unsloth
10
  - qwen2
11
  - trl
 
12
  ---
13
 
14
- Start: কম্পিউটাৰ
15
-
16
- Before:
17
- কম্পিউটাৰ কোনো স্বাস্থ্য করা হয় না। একটি স্বাস্থ্য করা
18
- কম্পিউটাৰ কোনো স্বাস্থ্য করা হয় না। একটি স্বাস্থ্য করা
19
-
20
-
21
- After:
22
- কম্পিউটাৰ পৰাপৰ পৰাপৰ পৰাপৰ পৰাপৰ পৰাপৰ পৰাপৰ প
23
- কম্পিউটাৰ পৰাপৰ পৰাপৰ পৰাপৰ পৰাপৰ পৰাপৰ পৰাপৰ প
24
- ```
25
- max_seq_length = 512
26
- per_device_train_batch_size = 2,
27
- gradient_accumulation_steps = 4,
28
- warmup_steps = 2,
29
- max_steps = 10,
30
- learning_rate = 0.0005,
31
- fp16 = not torch.cuda.is_bf16_supported(),
32
- bf16 = torch.cuda.is_bf16_supported(),
33
- logging_steps = 1,
34
- optim = "adamw_8bit",
35
- weight_decay = 0.01,
36
- lr_scheduler_type = "linear",
37
- seed = 3407,
38
- output_dir = "outputs",
39
- ```
40
-
41
- ---
42
  # Uploaded model
43
 
44
  - **Developed by:** tamang0000
45
  - **License:** apache-2.0
46
  - **Finetuned from model :** unsloth/qwen2-0.5b-bnb-4bit
47
 
48
- This qwen2 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
 
 
 
9
  - unsloth
10
  - qwen2
11
  - trl
12
+ - sft
13
  ---
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # Uploaded model
16
 
17
  - **Developed by:** tamang0000
18
  - **License:** apache-2.0
19
  - **Finetuned from model :** unsloth/qwen2-0.5b-bnb-4bit
20
 
21
+ This qwen2 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
22
+
23
+ [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "unsloth/qwen2-0.5b-bnb-4bit",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 131072,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 14,
17
+ "num_hidden_layers": 24,
18
+ "num_key_value_heads": 2,
19
+ "pad_token_id": 151646,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": 131072,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "float16",
25
+ "transformers_version": "4.41.2",
26
+ "unsloth_version": "2024.6",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.41.2"
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82486364219c3dc2fc58858f93c477af8913be3f516bd21e3eb60f8ff5928d94
3
+ size 988097536