vitellinho commited on
Commit
4b29833
1 Parent(s): d2ec443

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -13
app.py CHANGED
@@ -16,18 +16,13 @@ MODEL_ID1 = "microsoft/Phi-3.5-mini-instruct"
16
  MODEL_LIST1 = ["microsoft/Phi-3.5-mini-instruct"]
17
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
18
 
19
- device = "cuda" if torch.cuda.is_available() else "cpu"
20
-
21
- # Quantisierungskonfiguration nur aktivieren, wenn eine GPU vorhanden ist
22
- if device == "cuda":
23
- quantization_config = BitsAndBytesConfig(
24
- load_in_4bit=True,
25
- bnb_4bit_compute_dtype=torch.bfloat16,
26
- bnb_4bit_use_double_quant=True,
27
- bnb_4bit_quant_type="nf4"
28
- )
29
- else:
30
- quantization_config = None # Keine Quantisierung auf CPU
31
 
32
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID1)
33
  model = AutoModelForCausalLM.from_pretrained(
@@ -280,4 +275,4 @@ with gr.Blocks(css=CSS, theme="small_and_pretty") as demo:
280
  gr.HTML(footer)
281
 
282
  # Launch the combined app
283
- demo.launch(debug=True)
 
16
  MODEL_LIST1 = ["microsoft/Phi-3.5-mini-instruct"]
17
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
18
 
19
+ device = "cuda" if torch.cuda.is_available() else "cpu" # for GPU usage or "cpu" for CPU usage / But you need GPU :)
20
+
21
+ quantization_config = BitsAndBytesConfig(
22
+ load_in_4bit=True,
23
+ bnb_4bit_compute_dtype=torch.bfloat16,
24
+ bnb_4bit_use_double_quant=True,
25
+ bnb_4bit_quant_type="nf4")
 
 
 
 
 
26
 
27
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID1)
28
  model = AutoModelForCausalLM.from_pretrained(
 
275
  gr.HTML(footer)
276
 
277
  # Launch the combined app
278
+ demo.launch(debug=True)