java2core commited on
Commit
017b5db
·
verified ·
1 Parent(s): a9a07e5

Training in progress, epoch 1

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  base_model: google/gemma-3-1b-pt
3
  library_name: transformers
4
- model_name: gemma-3-1b-text-to-sql
5
  tags:
6
  - generated_from_trainer
7
  - trl
@@ -9,7 +9,7 @@ tags:
9
  licence: license
10
  ---
11
 
12
- # Model Card for gemma-3-1b-text-to-sql
13
 
14
  This model is a fine-tuned version of [google/gemma-3-1b-pt](https://huggingface.co/google/gemma-3-1b-pt).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
@@ -20,7 +20,7 @@ It has been trained using [TRL](https://github.com/huggingface/trl).
20
  from transformers import pipeline
21
 
22
  question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
- generator = pipeline("text-generation", model="java2core/gemma-3-1b-text-to-sql", device="cuda")
24
  output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
  print(output["generated_text"])
26
  ```
@@ -34,10 +34,10 @@ This model was trained with SFT.
34
 
35
  ### Framework versions
36
 
37
- - TRL: 0.21.0
38
- - Transformers: 4.55.0
39
- - Pytorch: 2.9.0.dev20250810
40
- - Datasets: 4.0.0
41
  - Tokenizers: 0.21.4
42
 
43
  ## Citations
@@ -49,7 +49,7 @@ Cite TRL as:
49
  ```bibtex
50
  @misc{vonwerra2022trl,
51
  title = {{TRL: Transformer Reinforcement Learning}},
52
- author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
53
  year = 2020,
54
  journal = {GitHub repository},
55
  publisher = {GitHub},
 
1
  ---
2
  base_model: google/gemma-3-1b-pt
3
  library_name: transformers
4
+ model_name: gemma-text-to-sql
5
  tags:
6
  - generated_from_trainer
7
  - trl
 
9
  licence: license
10
  ---
11
 
12
+ # Model Card for gemma-text-to-sql
13
 
14
  This model is a fine-tuned version of [google/gemma-3-1b-pt](https://huggingface.co/google/gemma-3-1b-pt).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
 
20
  from transformers import pipeline
21
 
22
  question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="java2core/gemma-text-to-sql", device="cuda")
24
  output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
  print(output["generated_text"])
26
  ```
 
34
 
35
  ### Framework versions
36
 
37
+ - TRL: 0.15.2
38
+ - Transformers: 4.55.2
39
+ - Pytorch: 2.8.0
40
+ - Datasets: 3.3.2
41
  - Tokenizers: 0.21.4
42
 
43
  ## Citations
 
49
  ```bibtex
50
  @misc{vonwerra2022trl,
51
  title = {{TRL: Transformer Reinforcement Learning}},
52
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
53
  year = 2020,
54
  journal = {GitHub repository},
55
  publisher = {GitHub},
adapter_config.json CHANGED
@@ -3,7 +3,6 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "google/gemma-3-1b-pt",
5
  "bias": "none",
6
- "corda_config": null,
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
@@ -23,23 +22,19 @@
23
  "embed_tokens"
24
  ],
25
  "peft_type": "LORA",
26
- "qalora_group_size": 16,
27
  "r": 16,
28
  "rank_pattern": {},
29
  "revision": null,
30
  "target_modules": [
31
- "o_proj",
32
- "up_proj",
33
  "down_proj",
34
  "k_proj",
 
 
35
  "gate_proj",
36
- "v_proj",
37
- "q_proj"
38
  ],
39
- "target_parameters": null,
40
  "task_type": "CAUSAL_LM",
41
- "trainable_token_indices": null,
42
  "use_dora": false,
43
- "use_qalora": false,
44
  "use_rslora": false
45
  }
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "google/gemma-3-1b-pt",
5
  "bias": "none",
 
6
  "eva_config": null,
7
  "exclude_modules": null,
8
  "fan_in_fan_out": false,
 
22
  "embed_tokens"
23
  ],
24
  "peft_type": "LORA",
 
25
  "r": 16,
26
  "rank_pattern": {},
27
  "revision": null,
28
  "target_modules": [
29
+ "q_proj",
 
30
  "down_proj",
31
  "k_proj",
32
+ "o_proj",
33
+ "up_proj",
34
  "gate_proj",
35
+ "v_proj"
 
36
  ],
 
37
  "task_type": "CAUSAL_LM",
 
38
  "use_dora": false,
 
39
  "use_rslora": false
40
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69a5cf91cdbab50130676051885bdfe237edca7af7e06062795cf22a13dab893
3
  size 1260191096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79bab42b7f3ffd7e08bbc107b7a4963ff093c228ddccea4d0eeb8604ac9a810d
3
  size 1260191096
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
runs/Aug15_08-16-06_gemma-3-test.us-central1-c.c.sayouzone-ai.internal/events.out.tfevents.1755245780.gemma-3-test.us-central1-c.c.sayouzone-ai.internal.1935.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d70adf2173b38bc539c6a403fd6b5939c8ec753e71de7006a620fc34fde2067f
3
+ size 8098
runs/Aug15_08-21-14_gemma-3-test.us-central1-c.c.sayouzone-ai.internal/events.out.tfevents.1755246089.gemma-3-test.us-central1-c.c.sayouzone-ai.internal.2603.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4092697de987cc78b131a38814e58793610a5b5baa6fb6d0f9ada60eea203563
3
+ size 7018
runs/Aug15_08-25-26_gemma-3-test.us-central1-c.c.sayouzone-ai.internal/events.out.tfevents.1755246341.gemma-3-test.us-central1-c.c.sayouzone-ai.internal.2719.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a2aa622acdfdf10ad727d3b64fa31f35ee2c79941968cddc13d43f441e5a472
3
+ size 93732
runs/Aug15_13-42-42_gemma-3-test.us-central1-c.c.sayouzone-ai.internal/events.out.tfevents.1755265377.gemma-3-test.us-central1-c.c.sayouzone-ai.internal.16219.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e435eed14df04b3bca88de5f5ab08e5d3388db6ae33e485b5f90eb696dd41d41
3
+ size 11923
runs/Aug16_02-55-56_gemma-3-test.us-central1-c.c.sayouzone-ai.internal/events.out.tfevents.1755312971.gemma-3-test.us-central1-c.c.sayouzone-ai.internal.2359.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58b00b79278bf81d454ae3eb0d1ad04e954936e5062f059d8ce889257e6912ec
3
+ size 35573
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:297637400f593b1cebed7ad48d8f7c337539f1e29f9436c3f64b77cb14f50ec9
3
- size 6225
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dafb40541f3fc8eaedf077e4207fa71556b83d32c4968c44105f27c5a76e483
3
+ size 6097