Deepan13 commited on
Commit
ea68e91
·
1 Parent(s): d4417c8

Add CodeLlama demo app quantized

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -1,11 +1,14 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
 
4
- model_id = "codellama/CodeLlama-7b-hf"
 
 
5
 
6
  tokenizer = AutoTokenizer.from_pretrained(model_id)
7
  model = AutoModelForCausalLM.from_pretrained(
8
  model_id,
 
9
  device_map="auto"
10
  )
11
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
3
 
4
+ model_id = "TheBloke/CodeLlama-7B-GPTQ" # Example 4-bit quantized model
5
+
6
+ bnb_config = BitsAndBytesConfig(load_in_4bit=True, device_map="auto")
7
 
8
  tokenizer = AutoTokenizer.from_pretrained(model_id)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_id,
11
+ quantization_config=bnb_config,
12
  device_map="auto"
13
  )
14