hysts HF staff commited on
Commit
5f1f01a
1 Parent(s): 79b912b

Not use 8 bit quantization

Browse files
Files changed (1) hide show
  1. app.py +2 -7
app.py CHANGED
@@ -5,12 +5,7 @@ from typing import Iterator
5
  import gradio as gr
6
  import spaces
7
  import torch
8
- from transformers import (
9
- AutoModelForCausalLM,
10
- BitsAndBytesConfig,
11
- GemmaTokenizerFast,
12
- TextIteratorStreamer,
13
- )
14
 
15
  DESCRIPTION = """\
16
  # Gemma 2 9B IT
@@ -33,7 +28,7 @@ tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
33
  model = AutoModelForCausalLM.from_pretrained(
34
  model_id,
35
  device_map="auto",
36
- quantization_config=BitsAndBytesConfig(load_in_8bit=True),
37
  )
38
  model.config.sliding_window = 4096
39
  model.eval()
 
5
  import gradio as gr
6
  import spaces
7
  import torch
8
+ from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
 
 
 
 
 
9
 
10
  DESCRIPTION = """\
11
  # Gemma 2 9B IT
 
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_id,
30
  device_map="auto",
31
+ torch_dtype=torch.bfloat16,
32
  )
33
  model.config.sliding_window = 4096
34
  model.eval()