richardr1126 commited on
Commit
9b91b1c
1 Parent(s): cd27bbf

Testing T4

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -6,6 +6,7 @@ from transformers import (
6
  StoppingCriteria,
7
  StoppingCriteriaList,
8
  TextIteratorStreamer,
 
9
  )
10
  import gradio as gr
11
  import torch
@@ -18,14 +19,18 @@ max_new_tokens = 1024
18
 
19
  print(f"Starting to load the model {model_name}")
20
 
 
 
 
 
 
21
  m = AutoModelForCausalLM.from_pretrained(
22
  model_name,
23
- llm_int8_enable_fp32_cpu_offload=True,
24
  device_map = {
25
  "transformer.h": 0,
26
  "": "cpu",
27
  },
28
- #load_in_8bit=True,
29
  )
30
 
31
  m.config.pad_token_id = m.config.eos_token_id
 
6
  StoppingCriteria,
7
  StoppingCriteriaList,
8
  TextIteratorStreamer,
9
+ BitsAndBytesConfig,
10
  )
11
  import gradio as gr
12
  import torch
 
19
 
20
  print(f"Starting to load the model {model_name}")
21
 
22
+ bnb_config = BitsAndBytesConfig(
23
+ llm_int8_enable_fp32_cpu_offload=True,
24
+ load_in_8bit=True,
25
+ )
26
+
27
  m = AutoModelForCausalLM.from_pretrained(
28
  model_name,
 
29
  device_map = {
30
  "transformer.h": 0,
31
  "": "cpu",
32
  },
33
+ quantization_config=bnb_config,
34
  )
35
 
36
  m.config.pad_token_id = m.config.eos_token_id