Pankaj Mathur
commited on
Commit
·
74d1fbd
1
Parent(s):
58be4f8
Update README.md
Browse files
README.md
CHANGED
@@ -21,7 +21,7 @@ We used DeepSpeed with Zero-3 approaches for parallel gpu training.
|
|
21 |
|
22 |
|||
|
23 |
|:-------------:|:-------------:|
|
24 |
-
|*
|
25 |
|*train_micro_batch_size_per_gpu*|2|
|
26 |
|*gradient_accumulation_steps*|2|
|
27 |
|*Learning rate*|2e-5|
|
@@ -38,28 +38,30 @@ Below shows an example on how to use OpenAlpaca
|
|
38 |
import torch
|
39 |
from transformers import LlamaForCausalLM, LlamaTokenizer
|
40 |
|
41 |
-
#
|
42 |
-
model_path =
|
43 |
tokenizer = LlamaTokenizer.from_pretrained(model_path)
|
44 |
-
model = LlamaForCausalLM.from_pretrained(
|
45 |
-
|
|
|
|
|
|
|
46 |
|
47 |
# same prompt as provided by Orca Research Paper
|
48 |
-
system =
|
49 |
-
instruction =
|
50 |
-
input =
|
51 |
|
|
|
|
|
52 |
|
53 |
-
prompt_no_input = f'.\n\n### Instruction:\n{instruction}\n\n### Response:'
|
54 |
tokens = tokenizer.encode(prompt_no_input)
|
55 |
-
|
56 |
tokens = torch.LongTensor(tokens).unsqueeze(0)
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
length = len(tokens[0])
|
63 |
with torch.no_grad():
|
64 |
rest = model.generate(
|
65 |
input_ids=tokens,
|
@@ -67,10 +69,12 @@ with torch.no_grad():
|
|
67 |
use_cache=True,
|
68 |
do_sample=True,
|
69 |
top_p=instance['top_p'],
|
70 |
-
top_k=instance['top_k']
|
|
|
71 |
)
|
72 |
|
73 |
output = rest[0][length:]
|
74 |
string = tokenizer.decode(output, skip_special_tokens=True)
|
75 |
-
print(f'[!]
|
|
|
76 |
```
|
|
|
21 |
|
22 |
|||
|
23 |
|:-------------:|:-------------:|
|
24 |
+
|*batch size*|16|
|
25 |
|*train_micro_batch_size_per_gpu*|2|
|
26 |
|*gradient_accumulation_steps*|2|
|
27 |
|*Learning rate*|2e-5|
|
|
|
38 |
import torch
|
39 |
from transformers import LlamaForCausalLM, LlamaTokenizer
|
40 |
|
41 |
+
# change model_path between 3b,7b or 13b
|
42 |
+
model_path = 'psmathur/alpaca_orca_open_llama_3b'
|
43 |
tokenizer = LlamaTokenizer.from_pretrained(model_path)
|
44 |
+
model = LlamaForCausalLM.from_pretrained(
|
45 |
+
model_path, torch_dtype=torch.float16, device_map='auto',
|
46 |
+
)
|
47 |
+
# check more details here https://github.com/openlm-research/open_llama
|
48 |
+
tokenizer.bos_token_id, tokenizer.eos_token_id = 1,2
|
49 |
|
50 |
# same prompt as provided by Orca Research Paper
|
51 |
+
system = 'You are an AI assistant. User will you give you a task. Your goal is to complete the task as faithfully as you can. While performing the task think step-by-step and justify your steps.'
|
52 |
+
instruction = 'Use the given data to calculate the median.'
|
53 |
+
input = '[7, 3, 8, 2, 10]'
|
54 |
|
55 |
+
prompt_input = f"### System:\n{system}\n\n#\n\n### User:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
|
56 |
+
#prompt_no_input = f"### System:\n{system}\n\n#\n\n### User:\n{instruction}\n\n### Response:\n"
|
57 |
|
|
|
58 |
tokens = tokenizer.encode(prompt_no_input)
|
|
|
59 |
tokens = torch.LongTensor(tokens).unsqueeze(0)
|
60 |
+
tokens = tokens.to('cuda')
|
61 |
+
|
62 |
+
instance = {'input_ids': tokens,'top_k': 50, 'top_p': 1.0, 'generate_len': 1024}
|
63 |
+
# instance = {'input_ids': tokens,'top_k': 50, 'top_p': 1.0, 'temperature':0.7, 'generate_len': 1024}
|
64 |
+
|
|
|
65 |
with torch.no_grad():
|
66 |
rest = model.generate(
|
67 |
input_ids=tokens,
|
|
|
69 |
use_cache=True,
|
70 |
do_sample=True,
|
71 |
top_p=instance['top_p'],
|
72 |
+
top_k=instance['top_k'],
|
73 |
+
# temperature=instance['temperature']
|
74 |
)
|
75 |
|
76 |
output = rest[0][length:]
|
77 |
string = tokenizer.decode(output, skip_special_tokens=True)
|
78 |
+
print(f'[!] Response: {string}')
|
79 |
+
|
80 |
```
|