pankajmathur
/

orca_alpaca_3b

@@ -21,7 +21,7 @@ We used DeepSpeed with Zero-3 approaches for parallel gpu training.
 |||
 |:-------------:|:-------------:|
-|*Batch Size*|16|
 |*train_micro_batch_size_per_gpu*|2|
 |*gradient_accumulation_steps*|2|
 |*Learning rate*|2e-5|
@@ -38,28 +38,30 @@ Below shows an example on how to use OpenAlpaca
 import torch
 from transformers import LlamaForCausalLM, LlamaTokenizer
-# the previewed version of OpenAlpaca
-model_path = r'psmathur/alpaca_orca_open_llama_3b'
 tokenizer = LlamaTokenizer.from_pretrained(model_path)
-model = LlamaForCausalLM.from_pretrained(model_path).cuda()
-tokenizer.bos_token_id, tokenizer.eos_token_id = 1,2 # see https://github.com/openlm-research/open_llama#preview-weights-release-and-usage
 # same prompt as provided by Orca Research Paper
-system = r'You are an AI assistant. User will you give you a task. Your goal is to complete the task as faithfully as you can. While performing the task think step-by-step and justify your steps.'
-instruction = r'Use the given data to calculate the median.'
-input = r'[7, 3, 8, 2, 10]'
-prompt_no_input = f'.\n\n### Instruction:\n{instruction}\n\n### Response:'
 tokens = tokenizer.encode(prompt_no_input)
 tokens = torch.LongTensor(tokens).unsqueeze(0)
-instance = {'input_ids': tokens,
-                    'top_k': 50,
-                    'top_p': 0.9,
-                    'generate_len': 128}
-length = len(tokens[0])
 with torch.no_grad():
     rest = model.generate(
             input_ids=tokens,
@@ -67,10 +69,12 @@ with torch.no_grad():
             use_cache=True,
             do_sample=True,
             top_p=instance['top_p'],
-            top_k=instance['top_k']
         )
 output = rest[0][length:]
 string = tokenizer.decode(output, skip_special_tokens=True)
-print(f'[!] Generation results: {string}')
 ```

 |||
 |:-------------:|:-------------:|
+|*batch size*|16|
 |*train_micro_batch_size_per_gpu*|2|
 |*gradient_accumulation_steps*|2|
 |*Learning rate*|2e-5|
 import torch
 from transformers import LlamaForCausalLM, LlamaTokenizer
+# change model_path between 3b,7b or 13b
+model_path = 'psmathur/alpaca_orca_open_llama_3b'
 tokenizer = LlamaTokenizer.from_pretrained(model_path)
+model = LlamaForCausalLM.from_pretrained(
+    model_path, torch_dtype=torch.float16, device_map='auto',
+)
+# check more details here https://github.com/openlm-research/open_llama
+tokenizer.bos_token_id, tokenizer.eos_token_id = 1,2
 # same prompt as provided by Orca Research Paper
+system = 'You are an AI assistant. User will you give you a task. Your goal is to complete the task as faithfully as you can. While performing the task think step-by-step and justify your steps.'
+instruction = 'Use the given data to calculate the median.'
+input = '[7, 3, 8, 2, 10]'
+prompt_input = f"### System:\n{system}\n\n#\n\n### User:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
+#prompt_no_input = f"### System:\n{system}\n\n#\n\n### User:\n{instruction}\n\n### Response:\n"
 tokens = tokenizer.encode(prompt_no_input)
 tokens = torch.LongTensor(tokens).unsqueeze(0)
+tokens = tokens.to('cuda')
+instance = {'input_ids': tokens,'top_k': 50, 'top_p': 1.0, 'generate_len': 1024}
+# instance = {'input_ids': tokens,'top_k': 50, 'top_p': 1.0, 'temperature':0.7, 'generate_len': 1024}
 with torch.no_grad():
     rest = model.generate(
             input_ids=tokens,
             use_cache=True,
             do_sample=True,
             top_p=instance['top_p'],
+            top_k=instance['top_k'],
+            # temperature=instance['temperature']
         )
 output = rest[0][length:]
 string = tokenizer.decode(output, skip_special_tokens=True)
+print(f'[!] Response: {string}')
 ```