sanagnos commited on
Commit
63d455d
1 Parent(s): ab40e53

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +36 -0
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from torch.nn import functional as F
4
+ import transformers
5
+
6
+ base_path = 'sanagnos/galactica-6.7b-finetuned'
7
+
8
+ model = transformers.OPTForCausalLM.from_pretrained(
9
+ base_path, load_in_8bit=True, device_map='auto', low_cpu_mem_usage=True,
10
+ torch_dtype=torch.float16, offload_state_dict=True
11
+ )
12
+
13
+ model.gradient_checkpointing_enable() # reduce number of stored activations
14
+ model.model.decoder.project_in = lambda x: x.requires_grad_(True)
15
+
16
+ class CastOutputToFloat(nn.Sequential):
17
+ def forward(self, x): return super().forward(x).to(torch.float32)
18
+
19
+ model.lm_head = CastOutputToFloat(model.lm_head)
20
+
21
+ tokenizer = transformers.AutoTokenizer.from_pretrained(base_path)
22
+
23
+ batch = "<question>What are the symptoms of Alzheimer's disease?<answer>"
24
+
25
+ with torch.cuda.amp.autocast():
26
+ out = model.generate(
27
+ input_ids=batch['input_ids'],
28
+ max_length=300,
29
+ do_sample=True,
30
+ top_k=40,
31
+ num_beams=1,
32
+ num_return_sequences=1,
33
+ eos_token_id=tokenizer.additional_special_tokens_ids[tokenizer.additional_special_tokens.index('<question>')]
34
+ )
35
+
36
+ message = tokenizer.decode(out[0, :-1]).replace('<question>', "User:\n").replace('<answer>', 'Assistant:\n')