lhl commited on
Commit
d387874
1 Parent(s): f00ac1d

try to fit on T4 (16GB RAM)

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -31,7 +31,9 @@ model = AutoModelForCausalLM.from_pretrained(
31
  model_name,
32
  torch_dtype=torch.bfloat16,
33
  device_map="auto",
34
- load_in_8bit=True,
 
 
35
  )
36
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
37
 
 
31
  model_name,
32
  torch_dtype=torch.bfloat16,
33
  device_map="auto",
34
+ # load_in_8bit=True,
35
+ # try to make it fit on a T4?
36
+ load_in_4bit=True
37
  )
38
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
39