richardr1126 commited on
Commit
78715f1
1 Parent(s): 9b91b1c
Files changed (1) hide show
  1. app.py +2 -11
app.py CHANGED
@@ -6,7 +6,6 @@ from transformers import (
6
  StoppingCriteria,
7
  StoppingCriteriaList,
8
  TextIteratorStreamer,
9
- BitsAndBytesConfig,
10
  )
11
  import gradio as gr
12
  import torch
@@ -19,18 +18,10 @@ max_new_tokens = 1024
19
 
20
  print(f"Starting to load the model {model_name}")
21
 
22
- bnb_config = BitsAndBytesConfig(
23
- llm_int8_enable_fp32_cpu_offload=True,
24
- load_in_8bit=True,
25
- )
26
-
27
  m = AutoModelForCausalLM.from_pretrained(
28
  model_name,
29
- device_map = {
30
- "transformer.h": 0,
31
- "": "cpu",
32
- },
33
- quantization_config=bnb_config,
34
  )
35
 
36
  m.config.pad_token_id = m.config.eos_token_id
 
6
  StoppingCriteria,
7
  StoppingCriteriaList,
8
  TextIteratorStreamer,
 
9
  )
10
  import gradio as gr
11
  import torch
 
18
 
19
  print(f"Starting to load the model {model_name}")
20
 
 
 
 
 
 
21
  m = AutoModelForCausalLM.from_pretrained(
22
  model_name,
23
+ device_map=0,
24
+ #load_in_8bit=True,
 
 
 
25
  )
26
 
27
  m.config.pad_token_id = m.config.eos_token_id