pcuenq HF staff commited on
Commit
36feb4a
1 Parent(s): a9d41fc

Upload files

Browse files
Files changed (5) hide show
  1. README.md +103 -0
  2. lora.bin +3 -0
  3. special_tokens_map.json +5 -0
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +9 -0
README.md CHANGED
@@ -1,3 +1,106 @@
1
  ---
2
  license: apache-2.0
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ base_model: togethercomputer/RedPajama-INCITE-Base-3B-v1
4
+ datasets:
5
+ - https://huggingface.co/datasets/johnrobinsn/alpaca-cleaned
6
+ tags:
7
+ - lora
8
+ - alpaca
9
+ - peft
10
+ - redpajama
11
  ---
12
+
13
+ # RedPajama-3B-instruct-lora
14
+
15
+ This is an instruction fine-tuned model of https://huggingface.co/togethercomputer/RedPajama-INCITE-Base-3B-v1, using `int8` mixed training.
16
+
17
+ ## Training dataset
18
+
19
+ Cleaned version of alpaca from https://huggingface.co/datasets/johnrobinsn/alpaca-cleaned.
20
+
21
+ ## How to use
22
+
23
+ ```Python
24
+ from huggingface_hub import model_info, hf_hub_download
25
+ from peft import LoraConfig, get_peft_model, set_peft_model_state_dict, TaskType
26
+ from textwrap import dedent
27
+ from transformers import AutoModelForCausalLM, AutoTokenizer
28
+ import torch
29
+
30
+ model_id = "pcuenq/RedPajama-3B-instruct-lora"
31
+
32
+ # Load base model
33
+
34
+ info = model_info(model_id)
35
+ base_model = info.cardData["base_model"]
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ base_model,
38
+ load_in_8bit=True,
39
+ device_map="auto",
40
+ )
41
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
42
+
43
+ # Prepare for LoRA
44
+
45
+ lora_config = LoraConfig(
46
+ r=8,
47
+ lora_alpha=16,
48
+ target_modules=["query_key_value"],
49
+ lora_dropout=0.05,
50
+ bias="none",
51
+ task_type=TaskType.CAUSAL_LM
52
+ )
53
+
54
+ model = get_peft_model(model, lora_config)
55
+
56
+ # Download and apply LoRA weights
57
+ lora_filename = hf_hub_download(repo_id=model_id, filename="lora.bin")
58
+ lora_dict = torch.load(lora_filename)
59
+ set_peft_model_state_dict(model, lora_dict)
60
+
61
+ # Run inference
62
+
63
+ def generate_prompt(instruction, inputs=None):
64
+ if inputs is not None:
65
+ return dedent(
66
+ f"""\
67
+ Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
68
+
69
+ ### Instruction:
70
+ {instruction}
71
+
72
+ ### Input:
73
+ {inputs}
74
+
75
+ ### Response:
76
+ """
77
+ )
78
+ else:
79
+ return dedent(
80
+ f"""\
81
+ Below is an instruction that describes a task. Write a response that appropriately completes the request.
82
+
83
+ ### Instruction:
84
+ {instruction}
85
+
86
+ ### Response:
87
+ """
88
+ )
89
+
90
+ prompt = generate_prompt("Has humankind ever set foot on the Moon?")
91
+ inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
92
+ input_length = inputs.input_ids.shape[1]
93
+ outputs = model.generate(
94
+ **inputs, max_new_tokens=50, do_sample=True, temperature=1.0, top_p=0.7, top_k=50, return_dict_in_generate=True
95
+ )
96
+ tokens = outputs.sequences[0, input_length:]
97
+
98
+ # Strip from first <eos>
99
+ eos_pos = (tokens == tokenizer.eos_token_id).nonzero()
100
+ if eos_pos.numel() > 0:
101
+ tokens = tokens[:eos_pos[0].item()]
102
+
103
+ output_str = tokenizer.decode(tokens)
104
+ print(output_str)
105
+ ```
106
+
lora.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ba829adbc86d07bbfe8d76f88ea21e2092b69e926e327a4236638aa470b80a5
3
+ size 10504543
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<eos>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|endoftext|>",
6
+ "model_max_length": 2048,
7
+ "tokenizer_class": "GPTNeoXTokenizer",
8
+ "unk_token": "<|endoftext|>"
9
+ }