m00bs/llama-3-8b-inst-CausalRelationship-finetune
Browse files- README.md +9 -51
- adapter_config.json +3 -3
- adapter_model.safetensors +1 -1
- llama-3-8b-News-Finetune/adapter_config.json +3 -3
- llama-3-8b-News-Finetune/adapter_model.safetensors +1 -1
- llama-3-8b-News-Finetune/training_args.bin +1 -1
- runs/Aug05_03-50-11_c76be924f45b/events.out.tfevents.1722829823.c76be924f45b.163.0 +3 -0
- runs/Aug05_04-06-13_c76be924f45b/events.out.tfevents.1722830785.c76be924f45b.163.1 +3 -0
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
---
|
2 |
base_model: unsloth/llama-3-8b-Instruct-bnb-4bit
|
3 |
library_name: peft
|
4 |
-
license: llama3
|
5 |
tags:
|
6 |
- trl
|
7 |
- sft
|
@@ -21,61 +21,17 @@ This model is a fine-tuned version of [unsloth/llama-3-8b-Instruct-bnb-4bit](htt
|
|
21 |
|
22 |
## Model description
|
23 |
|
24 |
-
|
25 |
-
It demonstrates data preparation, model configuration with LoRA, training with SFTTrainer, and inference with optimized settings.
|
26 |
-
The unsloth models, especially the 4-bit quantized versions, enable efficient and faster training and inference, making them suitable for various AI and ML applications.
|
27 |
|
28 |
-
##
|
29 |
|
30 |
-
|
31 |
|
32 |
-
|
33 |
-
import torch
|
34 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
35 |
-
from unsloth import FastLanguageModel
|
36 |
-
from unsloth.chat_templates import get_chat_template
|
37 |
-
from peft import PeftModel, PeftConfig
|
38 |
-
```
|
39 |
|
|
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
```python
|
44 |
-
# Load the tokenizer
|
45 |
-
tokenizer = AutoTokenizer.from_pretrained("m00bs/llama-3-8b-inst-CausalRelationship-finetune-tokenizer")
|
46 |
-
|
47 |
-
# Load the model
|
48 |
-
config = PeftConfig.from_pretrained("m00bs/outputs")
|
49 |
-
base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-Instruct-bnb-4bit")
|
50 |
-
model = PeftModel.from_pretrained(base_model, "m00bs/outputs")
|
51 |
-
|
52 |
-
# Move model to GPU if available
|
53 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
54 |
-
model.to(device)
|
55 |
-
|
56 |
-
```
|
57 |
-
|
58 |
-
3. **Prepare Inputs**
|
59 |
-
|
60 |
-
```python
|
61 |
-
# Prepare the input text
|
62 |
-
input_text = """As a finance expert, answer the following question about the following market event about Market Event:
|
63 |
-
Given that China's full reopening announcement on December 26, 2022 caused an immediate jump in Chinese stock prices, What was the impact of China's full reopening announcement on December 26, 2022 on Chinese stock prices?"""
|
64 |
-
|
65 |
-
# Tokenize the input text
|
66 |
-
inputs = tokenizer(input_text, return_tensors="pt").to(device)
|
67 |
-
```
|
68 |
-
|
69 |
-
4. **Run Inference**
|
70 |
-
|
71 |
-
```python
|
72 |
-
# Generate the response
|
73 |
-
outputs = model.generate(**inputs, max_new_tokens=300, use_cache=True)
|
74 |
-
|
75 |
-
# Decode the output
|
76 |
-
response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
77 |
-
```
|
78 |
-
|
79 |
|
80 |
### Training hyperparameters
|
81 |
|
@@ -92,6 +48,8 @@ The following hyperparameters were used during training:
|
|
92 |
- training_steps: 60
|
93 |
- mixed_precision_training: Native AMP
|
94 |
|
|
|
|
|
95 |
|
96 |
|
97 |
### Framework versions
|
|
|
1 |
---
|
2 |
base_model: unsloth/llama-3-8b-Instruct-bnb-4bit
|
3 |
library_name: peft
|
4 |
+
license: llama3
|
5 |
tags:
|
6 |
- trl
|
7 |
- sft
|
|
|
21 |
|
22 |
## Model description
|
23 |
|
24 |
+
More information needed
|
|
|
|
|
25 |
|
26 |
+
## Intended uses & limitations
|
27 |
|
28 |
+
More information needed
|
29 |
|
30 |
+
## Training and evaluation data
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
More information needed
|
33 |
|
34 |
+
## Training procedure
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
### Training hyperparameters
|
37 |
|
|
|
48 |
- training_steps: 60
|
49 |
- mixed_precision_training: Native AMP
|
50 |
|
51 |
+
### Training results
|
52 |
+
|
53 |
|
54 |
|
55 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -20,12 +20,12 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
23 |
"q_proj",
|
|
|
24 |
"up_proj",
|
25 |
-
"k_proj",
|
26 |
-
"v_proj",
|
27 |
"down_proj",
|
28 |
-
"
|
29 |
"o_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"k_proj",
|
24 |
"q_proj",
|
25 |
+
"gate_proj",
|
26 |
"up_proj",
|
|
|
|
|
27 |
"down_proj",
|
28 |
+
"v_proj",
|
29 |
"o_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2033567a1cd1794d0e2e1d2465a8f1e8dd5f528622c8d444b2994036159acb6e
|
3 |
size 167832240
|
llama-3-8b-News-Finetune/adapter_config.json
CHANGED
@@ -20,12 +20,12 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
23 |
"q_proj",
|
|
|
24 |
"up_proj",
|
25 |
-
"k_proj",
|
26 |
-
"v_proj",
|
27 |
"down_proj",
|
28 |
-
"
|
29 |
"o_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"k_proj",
|
24 |
"q_proj",
|
25 |
+
"gate_proj",
|
26 |
"up_proj",
|
|
|
|
|
27 |
"down_proj",
|
28 |
+
"v_proj",
|
29 |
"o_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
llama-3-8b-News-Finetune/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2033567a1cd1794d0e2e1d2465a8f1e8dd5f528622c8d444b2994036159acb6e
|
3 |
size 167832240
|
llama-3-8b-News-Finetune/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:959ffb565f674276e9449f64a7bc2c4f9d5872b07e98baf993264f115025f8ca
|
3 |
size 5176
|
runs/Aug05_03-50-11_c76be924f45b/events.out.tfevents.1722829823.c76be924f45b.163.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41acc89c27abb18c81b0335ad1b94320c6234552fff746f57672912def65182c
|
3 |
+
size 18168
|
runs/Aug05_04-06-13_c76be924f45b/events.out.tfevents.1722830785.c76be924f45b.163.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4aeb6e69add0d994f3dcf27912ac842116dcd08f7190674009be136d10c5e3b2
|
3 |
+
size 18168
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:959ffb565f674276e9449f64a7bc2c4f9d5872b07e98baf993264f115025f8ca
|
3 |
size 5176
|