rajeshthangaraj1 commited on
Commit
5617cf0
1 Parent(s): 56d25d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -13
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig
4
  import gradio as gr
5
 
6
 
@@ -10,25 +10,14 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
10
  os.environ['HF_TOKEN']=os.environ.get('HF_TOKEN')
11
  os.environ['HUGGINGFACEHUB_API_TOKEN']=os.environ.get('HF_TOKEN')
12
 
13
- # Configure quantization for CPU
14
- quantization_config = BitsAndBytesConfig(
15
- load_in_4bit=True,
16
- bnb_4bit_compute_dtype=torch.bfloat16,
17
- bnb_4bit_use_double_quant=True,
18
- bnb_4bit_quant_type="nf4"
19
- )
20
 
21
  # Load the model with quantization
22
  model = AutoModelForCausalLM.from_pretrained(
23
  MODEL_ID,
24
  torch_dtype=torch.bfloat16,
25
- device_map="auto",
26
- quantization_config=quantization_config
27
  )
28
 
29
- # Set the device to CPU
30
- device = "cpu"
31
- # model.to(device)
32
 
33
  # Define the function for the Gradio interface
34
  def chat_with_phi(message):
 
1
  import os
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
  import gradio as gr
5
 
6
 
 
10
  os.environ['HF_TOKEN']=os.environ.get('HF_TOKEN')
11
  os.environ['HUGGINGFACEHUB_API_TOKEN']=os.environ.get('HF_TOKEN')
12
 
 
 
 
 
 
 
 
13
 
14
  # Load the model with quantization
15
  model = AutoModelForCausalLM.from_pretrained(
16
  MODEL_ID,
17
  torch_dtype=torch.bfloat16,
18
+ device_map="auto"
 
19
  )
20
 
 
 
 
21
 
22
  # Define the function for the Gradio interface
23
  def chat_with_phi(message):