Spaces:

rajeshthangaraj1
/

testgen

Running

rajeshthangaraj1 commited on Oct 24, 2024

Commit

5617cf0

verified ·

1 Parent(s): 56d25d5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig
 import gradio as gr
@@ -10,25 +10,14 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 os.environ['HF_TOKEN']=os.environ.get('HF_TOKEN')
 os.environ['HUGGINGFACEHUB_API_TOKEN']=os.environ.get('HF_TOKEN')
-# Configure quantization for CPU
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.bfloat16,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type="nf4"
-)
 # Load the model with quantization
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
-    device_map="auto",
-    quantization_config=quantization_config
 )
-# Set the device to CPU
-device = "cpu"
-# model.to(device)
 # Define the function for the Gradio interface
 def chat_with_phi(message):

 import os
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import gradio as gr
 os.environ['HF_TOKEN']=os.environ.get('HF_TOKEN')
 os.environ['HUGGINGFACEHUB_API_TOKEN']=os.environ.get('HF_TOKEN')
 # Load the model with quantization
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
+    device_map="auto"
 )
 # Define the function for the Gradio interface
 def chat_with_phi(message):