madansa7 commited on
Commit
3716d98
·
verified ·
1 Parent(s): 18a25e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -2
app.py CHANGED
@@ -6,13 +6,12 @@ from transformers import pipeline
6
  print(f"GPU available: {torch.cuda.is_available()}")
7
  print(f"GPU name: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")
8
 
9
- # Initialize pipeline with GPU acceleration
10
  pipe = pipeline(
11
  "text-generation",
12
  model="agentica-org/DeepCoder-14B-Preview",
13
  device="cuda" if torch.cuda.is_available() else "cpu",
14
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
15
- model_kwargs={"load_in_4bit": True} # Quantization for memory efficiency
16
  )
17
 
18
  def chat(message, history):
 
6
  print(f"GPU available: {torch.cuda.is_available()}")
7
  print(f"GPU name: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")
8
 
9
+ # Initialize pipeline WITHOUT 4-bit quantization
10
  pipe = pipeline(
11
  "text-generation",
12
  model="agentica-org/DeepCoder-14B-Preview",
13
  device="cuda" if torch.cuda.is_available() else "cpu",
14
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
 
15
  )
16
 
17
  def chat(message, history):