bodam commited on
Commit
b44d4cb
1 Parent(s): 850d746

Upload Gemma2ForCausalLM

Browse files
README.md CHANGED
@@ -102,13 +102,11 @@ Markdown(decoded_output.split("AI:")[1])
102
  ### Training Details
103
 
104
  #### Training Data
105
- Data includes children's conversation datasets, anonymized and classified by developmental stages, ensuring a diverse and representative sample.
106
- The data we used is as follows.
107
- - https://www.aihub.or.kr/aihubdata/data/view.do?currMenu=115&topMenu=100&dataSetSn=543#:~:text=%EC%86%8C%EA%B0%9C.%20%EC%8B%9D%EC%9D%8C%EB%A3%8C,%20%EC%A3%BC%EA%B1%B0%EC%99%80%20%EC%83%9D%ED%99%9C,%20%EA%B5%90%ED%86%B5,%20%EA%B5%90%EC%9C%A1,%20%EA%B0%80%EC%A1%B1%20%EB%93%B1%2020%EC%97%AC%EA%B0%9C%20%EC%A3%BC%EC%A0%9C%EC%97%90%20%EB%8C%80%ED%95%9C%20%EC%9E%90%EC%9C%A0%EB%A1%9C%EC%9A%B4%20%EC%9D%BC%EC%83%81%EB%8C%80%ED%99%94,%EB%82%98%ED%83%80%EB%82%98%EB%8A%94%20%EB%AC%B8%EC%9E%A5
108
- - https://www.aihub.or.kr/aihubdata/data/view.do?currMenu=115&topMenu=100&dataSetSn=71694
109
 
110
  #### Training Procedure
111
- - **Preprocessing**: Text data was cleaned and formatted to remove any inappropriate content and personal data. To implement the persona of the service, the speaker's gender and age were specified during the data preprocessing phase. In the "Korean SNS Multi-turn Conversation Data," words like "레게노," which are used primarily on social media and rarely in actual spoken language, were removed.
112
  - **Model Fine-tuning**: Conducted on the cleaned dataset to tailor the model's responses to children's linguistic needs.
113
  - **Reinforcement Learning**: Implemented to refine the flow and appropriateness of conversations.
114
 
 
102
  ### Training Details
103
 
104
  #### Training Data
105
+ Data includes children's conversation datasets, anonymized and classified by developmental stages, ensuring a diverse and representative sample.
106
+ To implement the persona of the service, the speaker's gender and age were specified during the data preprocessing phase. In the "Korean SNS Multi-turn Conversation Data," words like "레게노," which are used primarily on social media and rarely in actual spoken language, were removed.
 
 
107
 
108
  #### Training Procedure
109
+ - **Preprocessing**: Text data was cleaned and formatted to remove any inappropriate content and personal data.
110
  - **Model Fine-tuning**: Conducted on the cleaned dataset to tailor the model's responses to children's linguistic needs.
111
  - **Reinforcement Learning**: Implemented to refine the flow and appropriateness of conversations.
112
 
config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/root/SudaGom/output/results/ft_model",
3
+ "architectures": [
4
+ "Gemma2ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": 50.0,
9
+ "bos_token_id": 2,
10
+ "cache_implementation": "hybrid",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": 30.0,
13
+ "head_dim": 256,
14
+ "hidden_act": "gelu_pytorch_tanh",
15
+ "hidden_activation": "gelu_pytorch_tanh",
16
+ "hidden_size": 3584,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 14336,
19
+ "max_position_embeddings": 8192,
20
+ "model_type": "gemma2",
21
+ "num_attention_heads": 16,
22
+ "num_hidden_layers": 42,
23
+ "num_key_value_heads": 8,
24
+ "pad_token_id": 0,
25
+ "quantization_config": {
26
+ "_load_in_4bit": true,
27
+ "_load_in_8bit": false,
28
+ "bnb_4bit_compute_dtype": "float16",
29
+ "bnb_4bit_quant_storage": "uint8",
30
+ "bnb_4bit_quant_type": "nf4",
31
+ "bnb_4bit_use_double_quant": false,
32
+ "llm_int8_enable_fp32_cpu_offload": false,
33
+ "llm_int8_has_fp16_weight": false,
34
+ "llm_int8_skip_modules": null,
35
+ "llm_int8_threshold": 6.0,
36
+ "load_in_4bit": true,
37
+ "load_in_8bit": false,
38
+ "quant_method": "bitsandbytes"
39
+ },
40
+ "query_pre_attn_scalar": 256,
41
+ "rms_norm_eps": 1e-06,
42
+ "rope_theta": 10000.0,
43
+ "sliding_window": 4096,
44
+ "sliding_window_size": 4096,
45
+ "torch_dtype": "bfloat16",
46
+ "transformers_version": "4.45.1",
47
+ "use_cache": true,
48
+ "vocab_size": 256000
49
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33ddfe6329b8c2f350f6121e9b40f4b7804a3fb981db10642b133427ba984429
3
+ size 4982071540
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9393c8398cbc75f1ad51e90a4e6da0711836fdae371f32d737970eded0c6a2e1
3
+ size 1536375974
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff