Remostart commited on
Commit
0e96995
·
verified ·
1 Parent(s): c4a6465

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -9
app.py CHANGED
@@ -1,21 +1,15 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
  from spaces import GPU
5
 
6
  # Load model & tokenizer
7
  MODEL_NAME = "ubiodee/plutus_llm"
8
- quantization_config = BitsAndBytesConfig(
9
- load_in_8bit=True,
10
- bnb_8bit_compute_dtype=torch.float16,
11
- bnb_8bit_use_double_quant=True
12
- )
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
14
  model = AutoModelForCausalLM.from_pretrained(
15
  MODEL_NAME,
16
- quantization_config=quantization_config,
17
- device_map="auto",
18
- torch_dtype=torch.float16
19
  )
20
 
21
  # Set padding token
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from spaces import GPU
5
 
6
  # Load model & tokenizer
7
  MODEL_NAME = "ubiodee/plutus_llm"
 
 
 
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  MODEL_NAME,
11
+ torch_dtype=torch.float16, # Use FP16 to reduce memory usage
12
+ device_map="auto" # Automatically distribute across CPU/GPU
 
13
  )
14
 
15
  # Set padding token