james92/llama7b__finetune_sample
Browse files- .gitignore +160 -0
- README.md +54 -0
- adapter_config.json +26 -0
- adapter_model.safetensors +3 -0
- huggingface_login.py +4 -0
- inference.py +61 -0
- llama_7b_james/README.md +204 -0
- llama_7b_james/adapter_config.json +26 -0
- llama_7b_james/adapter_model.safetensors +3 -0
- llama_7b_james/special_tokens_map.json +24 -0
- llama_7b_james/tokenizer.json +0 -0
- llama_7b_james/tokenizer_config.json +48 -0
- llama_7b_james/training_args.bin +3 -0
- lora.py +121 -0
- lora_bloom_3b.py +121 -0
- lora_llama2_7b.py +109 -0
- readMe +1 -0
- requirements.txt +9 -0
- special_tokens_map.json +24 -0
- tokenizer.json +0 -0
- tokenizer_config.json +48 -0
- training_args.bin +3 -0
.gitignore
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
README.md
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
tags:
|
4 |
+
- generated_from_trainer
|
5 |
+
base_model: NousResearch/Llama-2-7b-chat-hf
|
6 |
+
model-index:
|
7 |
+
- name: NousResearch/Llama-2-7b-chat-hf
|
8 |
+
results: []
|
9 |
+
---
|
10 |
+
|
11 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
12 |
+
should probably proofread and complete it, then remove this comment. -->
|
13 |
+
|
14 |
+
# NousResearch/Llama-2-7b-chat-hf
|
15 |
+
|
16 |
+
This model is a fine-tuned version of [NousResearch/Llama-2-7b-chat-hf](https://huggingface.co/NousResearch/Llama-2-7b-chat-hf) on an unknown dataset.
|
17 |
+
|
18 |
+
## Model description
|
19 |
+
|
20 |
+
More information needed
|
21 |
+
|
22 |
+
## Intended uses & limitations
|
23 |
+
|
24 |
+
More information needed
|
25 |
+
|
26 |
+
## Training and evaluation data
|
27 |
+
|
28 |
+
More information needed
|
29 |
+
|
30 |
+
## Training procedure
|
31 |
+
|
32 |
+
### Training hyperparameters
|
33 |
+
|
34 |
+
The following hyperparameters were used during training:
|
35 |
+
- learning_rate: 0.0002
|
36 |
+
- train_batch_size: 4
|
37 |
+
- eval_batch_size: 8
|
38 |
+
- seed: 42
|
39 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
40 |
+
- lr_scheduler_type: constant
|
41 |
+
- lr_scheduler_warmup_ratio: 0.03
|
42 |
+
- num_epochs: 1
|
43 |
+
|
44 |
+
### Training results
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
### Framework versions
|
49 |
+
|
50 |
+
- PEFT 0.7.1
|
51 |
+
- Transformers 4.36.2
|
52 |
+
- Pytorch 2.1.0+cu118
|
53 |
+
- Datasets 2.15.0
|
54 |
+
- Tokenizers 0.15.0
|
adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "NousResearch/Llama-2-7b-chat-hf",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 16,
|
13 |
+
"lora_dropout": 0.1,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": null,
|
17 |
+
"peft_type": "LORA",
|
18 |
+
"r": 8,
|
19 |
+
"rank_pattern": {},
|
20 |
+
"revision": null,
|
21 |
+
"target_modules": [
|
22 |
+
"q_proj",
|
23 |
+
"v_proj"
|
24 |
+
],
|
25 |
+
"task_type": "CAUSAL_LM"
|
26 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cfecdabcd528507e41cc28711e5f917d3abde045f173c769ba07dd90646b14a
|
3 |
+
size 16794200
|
huggingface_login.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import notebook_login
|
2 |
+
notebook_login()
|
3 |
+
|
4 |
+
HUGGING_FACE_USER_NAME = ""
|
inference.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from peft import LoraConfig,PeftModel,PeftConfig
|
2 |
+
from transformers import AutoTokenizer,AutoModelForCausalLM
|
3 |
+
import torch
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
def make_inference(context, question):
|
8 |
+
batch = tokenizer(f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n", return_tensors='pt')
|
9 |
+
|
10 |
+
if torch.cuda.is_available():
|
11 |
+
batch = {k: v.to('cuda') for k, v in batch.items()}
|
12 |
+
|
13 |
+
with torch.cuda.amp.autocast():
|
14 |
+
output_tokens = qa_model.generate(**batch, max_new_tokens=200)
|
15 |
+
|
16 |
+
return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
model_name = "bloom7b__finetune_sample"
|
21 |
+
HUGGING_FACE_USER_NAME = "james92"
|
22 |
+
|
23 |
+
# Do the inference
|
24 |
+
peft_model_id = f'{HUGGING_FACE_USER_NAME}/{model_name}'
|
25 |
+
config = PeftConfig.from_pretrained(peft_model_id)
|
26 |
+
inference_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,return_dict=True,load_in_8bit=False,device_map='auto')
|
27 |
+
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
28 |
+
|
29 |
+
# Do the inference
|
30 |
+
qa_model = PeftModel.from_pretrained(inference_model,peft_model_id)
|
31 |
+
|
32 |
+
# Print the model.
|
33 |
+
print(qa_model)
|
34 |
+
|
35 |
+
|
36 |
+
context = "Cheese is the best food."
|
37 |
+
question = "What is the best food?"
|
38 |
+
|
39 |
+
print(make_inference(context=context,question=question))
|
40 |
+
|
41 |
+
|
42 |
+
context = "The Moon orbits Earth at an average distance of 384,400 km (238,900 mi), or about 30 times Earth's diameter. Its gravitational influence is the main driver of Earth's tides and very slowly lengthens Earth's day. The Moon's orbit around Earth has a sidereal period of 27.3 days. During each synodic period of 29.5 days, the amount of visible surface illuminated by the Sun varies from none up to 100%, resulting in lunar phases that form the basis for the months of a lunar calendar. The Moon is tidally locked to Earth, which means that the length of a full rotation of the Moon on its own axis causes its same side (the near side) to always face Earth, and the somewhat longer lunar day is the same as the synodic period. However, 59% of the total lunar surface can be seen from Earth through cyclical shifts in perspective known as libration."
|
43 |
+
question = "At what distance does the Moon orbit the Earth?"
|
44 |
+
|
45 |
+
print(make_inference(context=context,question=question))
|
46 |
+
|
47 |
+
|
48 |
+
context = "Cheese is the best food."
|
49 |
+
question = "How far away is the Moon from the Earth?"
|
50 |
+
|
51 |
+
|
52 |
+
print(make_inference(context=context,question=question))
|
53 |
+
|
54 |
+
|
55 |
+
context = '''
|
56 |
+
Content - 0:product.partNumber=CO-AM-S185,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=242,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=3844150,status=Active,value=69938,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 1:product.partNumber=CO-AM-S185,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=303,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=5227245,status=Active,value=87567,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 2:product.partNumber=CO-AM-S393,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=222,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=9329151,status=Active,value=40626,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 3:product.partNumber=CO-AM-B209,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=88,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=1716142,status=Active,value=10560,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 4:product.partNumber=CO-AM-S393,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=89,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=6375908,status=Active,value=16287,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 5:product.partNumber=CO-RD-I380,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=168,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=286860,status=Active,value=13104,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 6:product.partNumber=CO-RD-I380,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=189,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=2162419,status=Active,value=14742,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 7:product.partNumber=CO-DL-T316,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=270,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=9656094,status=Active,value=14850,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 8:product.partNumber=CO-DL-A230,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=125,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=6219859,status=Active,value=6375,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 9:product.partNumber=CO-DL-A239,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=103,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=3352121,status=Active,value=4944,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 10:product.partNumber=CO-AM-B209,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=384,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=3024598,status=Active,value=46080,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 11:product.partNumber=CO-DL-A230,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=148,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=8276232,status=Active,value=7548,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 12:product.partNumber=CO-DL-A239,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=222,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=23233,status=Active,value=10656,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 13:product.partNumber=CO-DL-T316,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=50,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=6529379,status=Active,value=2750,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 14:product.partNumber=CO-RD-S282,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=352,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=3932441,status=Active,value=30976,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 15:product.partNumber=CO-RD-S282,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=388,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=1501513,status=Active,value=34144,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 16:product.partNumber=CO-RD-R296,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=125,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=3090203,status=Active,value=11625,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 17:product.partNumber=PS-PL-R186,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=38,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=7999926,status=Active,value=47500,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 18:product.partNumber=PS-PL-R186,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=39,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=5754848,status=Active,value=48750,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 19:product.partNumber=CO-RD-R296,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=162,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=6526123,status=Active,value=15066,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 20:product.partNumber=CO-CO-N311,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=256,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=8095102,status=Active,value=64000,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 21:product.partNumber=CO-CO-N311,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=124,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=4709951,status=Active,value=31000,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 22:product.partNumber=CO-CO-V310,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=59,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=8601192,status=Active,value=5900,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 23:product.partNumber=CO-CO-V310,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=121,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=3952909,status=Active,value=12100,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 24:product.partNumber=PS-SL-C193,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=49,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=2450098,status=Active,value=61250,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 25:product.partNumber=CO-CO-M328,location.locationIdentifier=LT-1,inventoryType=PRODUCT,quantity=214,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=6340793,status=Active,value=26750,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 26:product.partNumber=CO-CO-M328,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=230,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=4564154,status=Active,value=28750,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\nContent - 27:product.partNumber=PS-SL-C193,location.locationIdentifier=LT-2,inventoryType=PRODUCT,quantity=41,quantityUnits=EA,expirationDate=2022-12-31T00:00:00,inventoryParentType=ONHAND,class=NEW,segment=INDUSTRIAL,lotCode=9412870,status=Active,value=51250,valueCurrency=USD,sourceLink=https://foo.com,storageDate=2022-01-01T00:00:00\n\n
|
57 |
+
'''
|
58 |
+
question = "give me details about the product whose partnumber is CO-AM-S185"
|
59 |
+
|
60 |
+
|
61 |
+
print(make_inference(context=context,question=question))
|
llama_7b_james/README.md
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: NousResearch/Llama-2-7b-chat-hf
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
|
201 |
+
|
202 |
+
### Framework versions
|
203 |
+
|
204 |
+
- PEFT 0.7.1
|
llama_7b_james/adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "NousResearch/Llama-2-7b-chat-hf",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 16,
|
13 |
+
"lora_dropout": 0.1,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": null,
|
17 |
+
"peft_type": "LORA",
|
18 |
+
"r": 8,
|
19 |
+
"rank_pattern": {},
|
20 |
+
"revision": null,
|
21 |
+
"target_modules": [
|
22 |
+
"q_proj",
|
23 |
+
"v_proj"
|
24 |
+
],
|
25 |
+
"task_type": "CAUSAL_LM"
|
26 |
+
}
|
llama_7b_james/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cfecdabcd528507e41cc28711e5f917d3abde045f173c769ba07dd90646b14a
|
3 |
+
size 16794200
|
llama_7b_james/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "</s>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": true,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
llama_7b_james/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
llama_7b_james/tokenizer_config.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": true,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"32000": {
|
30 |
+
"content": "<pad>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": false
|
36 |
+
}
|
37 |
+
},
|
38 |
+
"bos_token": "<s>",
|
39 |
+
"clean_up_tokenization_spaces": false,
|
40 |
+
"eos_token": "</s>",
|
41 |
+
"legacy": false,
|
42 |
+
"model_max_length": 1000000000000000019884624838656,
|
43 |
+
"pad_token": "</s>",
|
44 |
+
"sp_model_kwargs": {},
|
45 |
+
"tokenizer_class": "LlamaTokenizer",
|
46 |
+
"unk_token": "<unk>",
|
47 |
+
"use_default_system_prompt": false
|
48 |
+
}
|
llama_7b_james/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:621ff14e95b5620fce8fe1cf605259e115aabbc0c3b5a92d983b045b424f002e
|
3 |
+
size 4664
|
lora.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lora exercise
|
2 |
+
|
3 |
+
|
4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
+
from peft import LoraConfig,get_peft_model,PeftModel,PeftConfig
|
6 |
+
from datasets import load_dataset
|
7 |
+
import bitsandbytes as bnb
|
8 |
+
import transformers
|
9 |
+
import torch.nn as nn
|
10 |
+
import torch
|
11 |
+
|
12 |
+
# check if cuda is available
|
13 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
14 |
+
|
15 |
+
|
16 |
+
model = AutoModelForCausalLM.from_pretrained(
|
17 |
+
'bigscience/bloom-3b',
|
18 |
+
torch_dtype=torch.float16,
|
19 |
+
device_map='auto'
|
20 |
+
)
|
21 |
+
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer")
|
23 |
+
|
24 |
+
#print the model
|
25 |
+
print(model)
|
26 |
+
|
27 |
+
|
28 |
+
for param in model.parameters():
|
29 |
+
param.requires_grad=False
|
30 |
+
|
31 |
+
if param.ndim == 1: # cast the small parameters to fp32 for stability
|
32 |
+
param.data = param.data.to(torch.float32)
|
33 |
+
|
34 |
+
|
35 |
+
model.gradient_checkpointing_enable()
|
36 |
+
model.enable_input_require_grads()
|
37 |
+
|
38 |
+
# The key takeaway here is that CastOutputToFloat is primarily designed to ensure the output of model.lm_head is in float32 format.
|
39 |
+
class CastOutputToFloat(nn.Sequential):
|
40 |
+
def forward(self,x): return super().forward(x).to(torch.float32)
|
41 |
+
|
42 |
+
model.lm_head = CastOutputToFloat(model.lm_head)
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
# print trainable parameters
|
47 |
+
|
48 |
+
def print_trainable_parameters(model):
|
49 |
+
trainable_params = 0
|
50 |
+
all_params = 0
|
51 |
+
|
52 |
+
for _, param in model.named_parameters():
|
53 |
+
all_params += param.numel()
|
54 |
+
if param.requires_grad:
|
55 |
+
trainable_params += param.numel()
|
56 |
+
|
57 |
+
print(f'Trainable Params:{trainable_params}, All Params:{all_params}, trainable % {100 * (trainable_params/all_params)}')
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
# obtain Lora config
|
64 |
+
config = LoraConfig(
|
65 |
+
r=8,
|
66 |
+
lora_alpha=16,
|
67 |
+
target_modules=['query_key_value'],
|
68 |
+
lora_dropout=0.05,
|
69 |
+
bias="none",
|
70 |
+
task_type="CAUSAL_LM"
|
71 |
+
)
|
72 |
+
|
73 |
+
# Find out the difference between pretrained model and the current model
|
74 |
+
model = get_peft_model(model=model,peft_config=config)
|
75 |
+
print_trainable_parameters(model=model)
|
76 |
+
|
77 |
+
|
78 |
+
qa_dataset = load_dataset('squad_v2')
|
79 |
+
|
80 |
+
|
81 |
+
def create_prompt(context, question, answer):
|
82 |
+
result = ""
|
83 |
+
if len(answer['text']) < 1:
|
84 |
+
result = "I don't the answer"
|
85 |
+
else:
|
86 |
+
result = answer['text'][0]
|
87 |
+
prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### AMSWER\n{result}</s>"
|
88 |
+
return prompt_template
|
89 |
+
|
90 |
+
mapped_dataset = qa_dataset.map(lambda samples: tokenizer(create_prompt(samples['context'],samples['question'],samples['answers'])))
|
91 |
+
|
92 |
+
# Understand the parameters once again
|
93 |
+
trainer = transformers.Trainer(
|
94 |
+
model=model,
|
95 |
+
train_dataset=mapped_dataset['train'],
|
96 |
+
args=transformers.TrainingArguments(
|
97 |
+
per_device_eval_batch_size=4,
|
98 |
+
gradient_accumulation_steps=4,
|
99 |
+
warmup_steps=100,
|
100 |
+
max_steps=100,
|
101 |
+
num_train_epochs=3,
|
102 |
+
learning_rate=1e-3,
|
103 |
+
fp16=True,
|
104 |
+
logging_steps=1,
|
105 |
+
output_dir='outputs'
|
106 |
+
),
|
107 |
+
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
|
108 |
+
)
|
109 |
+
|
110 |
+
# WHat is the use of cache here.
|
111 |
+
model.config.use_cache = False
|
112 |
+
trainer.train()
|
113 |
+
|
114 |
+
|
115 |
+
# Upload to hugging_face
|
116 |
+
model_name = "bloom7b__finetune_sample"
|
117 |
+
HUGGING_FACE_USER_NAME = "james92"
|
118 |
+
|
119 |
+
model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{model_name}", use_auth_token=True)
|
120 |
+
|
121 |
+
print("Model is saved in hggingface")
|
lora_bloom_3b.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lora exercise
|
2 |
+
|
3 |
+
|
4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
+
from peft import LoraConfig,get_peft_model,PeftModel,PeftConfig
|
6 |
+
from datasets import load_dataset
|
7 |
+
import bitsandbytes as bnb
|
8 |
+
import transformers
|
9 |
+
import torch.nn as nn
|
10 |
+
import torch
|
11 |
+
|
12 |
+
# check if cuda is available
|
13 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
14 |
+
|
15 |
+
|
16 |
+
model = AutoModelForCausalLM.from_pretrained(
|
17 |
+
'bigscience/bloom-3b',
|
18 |
+
torch_dtype=torch.float16,
|
19 |
+
device_map='auto'
|
20 |
+
)
|
21 |
+
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer")
|
23 |
+
|
24 |
+
#print the model
|
25 |
+
print(model)
|
26 |
+
|
27 |
+
# freezing the parameter
|
28 |
+
for param in model.parameters():
|
29 |
+
param.requires_grad=False
|
30 |
+
|
31 |
+
if param.ndim == 1: # cast the small parameters to fp32 for stability
|
32 |
+
param.data = param.data.to(torch.float32)
|
33 |
+
|
34 |
+
|
35 |
+
model.gradient_checkpointing_enable()
|
36 |
+
model.enable_input_require_grads()
|
37 |
+
|
38 |
+
# The key takeaway here is that CastOutputToFloat is primarily designed to ensure the output of model.lm_head is in float32 format.
|
39 |
+
class CastOutputToFloat(nn.Sequential):
|
40 |
+
def forward(self,x): return super().forward(x).to(torch.float32)
|
41 |
+
|
42 |
+
model.lm_head = CastOutputToFloat(model.lm_head)
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
# print trainable parameters
|
47 |
+
|
48 |
+
def print_trainable_parameters(model):
|
49 |
+
trainable_params = 0
|
50 |
+
all_params = 0
|
51 |
+
|
52 |
+
for _, param in model.named_parameters():
|
53 |
+
all_params += param.numel()
|
54 |
+
if param.requires_grad:
|
55 |
+
trainable_params += param.numel()
|
56 |
+
|
57 |
+
print(f'Trainable Params:{trainable_params}, All Params:{all_params}, trainable % {100 * (trainable_params/all_params)}')
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
# obtain Lora config
|
64 |
+
config = LoraConfig(
|
65 |
+
r=8,
|
66 |
+
lora_alpha=16,
|
67 |
+
target_modules=['query_key_value'],
|
68 |
+
lora_dropout=0.05,
|
69 |
+
bias="none",
|
70 |
+
task_type="CAUSAL_LM"
|
71 |
+
)
|
72 |
+
|
73 |
+
# Find out the difference between pretrained model and the current model
|
74 |
+
model = get_peft_model(model=model,peft_config=config)
|
75 |
+
print_trainable_parameters(model=model)
|
76 |
+
|
77 |
+
|
78 |
+
qa_dataset = load_dataset('squad_v2')
|
79 |
+
|
80 |
+
|
81 |
+
def create_prompt(context, question, answer):
|
82 |
+
result = ""
|
83 |
+
if len(answer['text']) < 1:
|
84 |
+
result = "I don't the answer"
|
85 |
+
else:
|
86 |
+
result = answer['text'][0]
|
87 |
+
prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### AMSWER\n{result}</s>"
|
88 |
+
return prompt_template
|
89 |
+
|
90 |
+
mapped_dataset = qa_dataset.map(lambda samples: tokenizer(create_prompt(samples['context'],samples['question'],samples['answers'])))
|
91 |
+
|
92 |
+
# Understand the parameters once again
|
93 |
+
trainer = transformers.Trainer(
|
94 |
+
model=model,
|
95 |
+
train_dataset=mapped_dataset['train'],
|
96 |
+
args=transformers.TrainingArguments(
|
97 |
+
per_device_eval_batch_size=4,
|
98 |
+
gradient_accumulation_steps=4,
|
99 |
+
warmup_steps=100,
|
100 |
+
max_steps=100,
|
101 |
+
num_train_epochs=3,
|
102 |
+
learning_rate=1e-3,
|
103 |
+
fp16=True,
|
104 |
+
logging_steps=1,
|
105 |
+
output_dir='outputs'
|
106 |
+
),
|
107 |
+
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
|
108 |
+
)
|
109 |
+
|
110 |
+
# WHat is the use of cache here.
|
111 |
+
model.config.use_cache = False
|
112 |
+
trainer.train()
|
113 |
+
|
114 |
+
|
115 |
+
# Upload to hugging_face
|
116 |
+
model_name = "bloom7b__finetune_sample"
|
117 |
+
HUGGING_FACE_USER_NAME = "james92"
|
118 |
+
|
119 |
+
model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{model_name}", use_auth_token=True)
|
120 |
+
|
121 |
+
print("Model is saved in hggingface")
|
lora_llama2_7b.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# T o load the dataset
|
2 |
+
from datasets import load_dataset
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser,TrainingArguments, pipeline
|
4 |
+
from peft import LoraConfig, PeftModel
|
5 |
+
from trl import SFTTrainer
|
6 |
+
import torch
|
7 |
+
|
8 |
+
|
9 |
+
# Setting up the model and tokenizer
|
10 |
+
data_name = "mlabonne/guanaco-llama2-1k"
|
11 |
+
training_data = load_dataset(data_name,split='train')
|
12 |
+
|
13 |
+
# Model and tokenizer names
|
14 |
+
base_model_name = "NousResearch/Llama-2-7b-chat-hf"
|
15 |
+
|
16 |
+
|
17 |
+
#Tokenizer
|
18 |
+
|
19 |
+
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
|
20 |
+
llama_tokenizer.pad_token = llama_tokenizer.eos_token
|
21 |
+
llama_tokenizer.padding_side = 'right'
|
22 |
+
|
23 |
+
# Quantization Config
|
24 |
+
|
25 |
+
quant_config = BitsAndBytesConfig(
|
26 |
+
load_in_4bit=True,
|
27 |
+
bnb_4bit_quant_type='nf4',
|
28 |
+
bnb_4bit_compute_dtype=torch.float16,
|
29 |
+
bnb_4bit_use_double_quant=False
|
30 |
+
)
|
31 |
+
|
32 |
+
# Model
|
33 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
34 |
+
base_model_name,
|
35 |
+
quantization_config = quant_config,
|
36 |
+
device_map='auto'
|
37 |
+
)
|
38 |
+
base_model.config.use_cache=False
|
39 |
+
base_model.config.pretraining_tp=1 # tensor parallelism rank
|
40 |
+
|
41 |
+
''''
|
42 |
+
Double quantization is a technique where weights are quantized twice with different quantization parameters,
|
43 |
+
potentially improving the accuracy of the quantized model. However, it may also increase computational complexity.
|
44 |
+
'''
|
45 |
+
|
46 |
+
'''
|
47 |
+
LoRA-Specific Parameters
|
48 |
+
|
49 |
+
Dropout Rate (lora_dropout): This is the probability that each neuron’s output is set to zero during training, used to prevent overfitting.
|
50 |
+
Rank (r): Rank is essentially a measure of how the original weight matrices are broken down into simpler, smaller matrices. This reduces
|
51 |
+
computational requirements and memory consumption. Lower ranks make the model faster but might sacrifice performance. The original LoRA paper
|
52 |
+
suggests starting with a rank of 8, but for QLoRA, a rank of 64 is required.
|
53 |
+
|
54 |
+
lora_alpha: This parameter controls the scaling of the low-rank approximation. It’s like a balancing act between the original model and the low-rank approximation.
|
55 |
+
Higher values might make the approximation more influential in the fine-tuning process, affecting both performance and computational cost.
|
56 |
+
|
57 |
+
'''
|
58 |
+
|
59 |
+
# Lora Config
|
60 |
+
peft_config = LoraConfig(lora_alpha=16,
|
61 |
+
lora_dropout=0.1,
|
62 |
+
r=8,
|
63 |
+
bias='none',
|
64 |
+
task_type='CAUSAL_LM')
|
65 |
+
|
66 |
+
|
67 |
+
# Training args
|
68 |
+
train_params = TrainingArguments(
|
69 |
+
output_dir="./",
|
70 |
+
num_train_epochs=1,
|
71 |
+
per_device_train_batch_size=4,
|
72 |
+
gradient_accumulation_steps=1,
|
73 |
+
optim="paged_adamw_32bit",
|
74 |
+
save_steps=25,
|
75 |
+
logging_steps=25,
|
76 |
+
learning_rate=2e-4,
|
77 |
+
weight_decay=0.001,
|
78 |
+
fp16=False,
|
79 |
+
bf16=False,
|
80 |
+
max_grad_norm=0.3,
|
81 |
+
max_steps=-1,
|
82 |
+
warmup_ratio=0.03,
|
83 |
+
group_by_length=True,
|
84 |
+
lr_scheduler_type="constant"
|
85 |
+
)
|
86 |
+
|
87 |
+
# Trainer
|
88 |
+
fine_tuning = SFTTrainer(
|
89 |
+
model=base_model,
|
90 |
+
train_dataset=training_data,
|
91 |
+
peft_config=peft_config,
|
92 |
+
dataset_text_field="text",
|
93 |
+
tokenizer=llama_tokenizer,
|
94 |
+
args=train_params
|
95 |
+
)
|
96 |
+
|
97 |
+
# call the train function
|
98 |
+
fine_tuning.train()
|
99 |
+
|
100 |
+
# save the model
|
101 |
+
fine_tuning.save_model("llama_7b_james")
|
102 |
+
|
103 |
+
# Upload to hugging_face
|
104 |
+
model_name = "llama7b__finetune_sample"
|
105 |
+
HUGGING_FACE_USER_NAME = "james92"
|
106 |
+
|
107 |
+
fine_tuning.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{model_name}")
|
108 |
+
|
109 |
+
print("Model is saved in hggingface")
|
readMe
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Experimenting Lora on LLM
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
bitsandbytes
|
2 |
+
datasets
|
3 |
+
accelerate
|
4 |
+
loralib
|
5 |
+
transformers
|
6 |
+
peft
|
7 |
+
scipy
|
8 |
+
huggingface_hub
|
9 |
+
trl
|
special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "</s>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": true,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": true,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"32000": {
|
30 |
+
"content": "<pad>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": false
|
36 |
+
}
|
37 |
+
},
|
38 |
+
"bos_token": "<s>",
|
39 |
+
"clean_up_tokenization_spaces": false,
|
40 |
+
"eos_token": "</s>",
|
41 |
+
"legacy": false,
|
42 |
+
"model_max_length": 1000000000000000019884624838656,
|
43 |
+
"pad_token": "</s>",
|
44 |
+
"sp_model_kwargs": {},
|
45 |
+
"tokenizer_class": "LlamaTokenizer",
|
46 |
+
"unk_token": "<unk>",
|
47 |
+
"use_default_system_prompt": false
|
48 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:621ff14e95b5620fce8fe1cf605259e115aabbc0c3b5a92d983b045b424f002e
|
3 |
+
size 4664
|