nova commited on
Commit
b439d72
·
verified ·
1 Parent(s): a155f45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -2,8 +2,13 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  from threading import Thread
5
- # Model Configuration: Qwen 2.5 1.5B (Much Smarter, still runs on Free CPU)
6
- MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
 
 
 
 
 
7
  # Check Device
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
  print(f"🚀 Loading {MODEL_ID} on {device}...")
@@ -13,7 +18,8 @@ try:
13
  MODEL_ID,
14
  dtype=torch.float16 if device == "cuda" else torch.float32,
15
  device_map="auto",
16
- trust_remote_code=True
 
17
  )
18
  except Exception as e:
19
  print(f"❌ Error loading model: {e}")
 
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  from threading import Thread
5
+ # Model Configuration: Qwen 2.5 0.5B (ULTRA FAST - Low RAM/CPU)
6
+ MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
7
+ # Resource Optimization (Save Cores & RAM)
8
+ import os
9
+ os.environ["OMP_NUM_THREADS"] = "4" # Limit threads to avoid CPU contention
10
+ os.environ["MKL_NUM_THREADS"] = "4"
11
+ torch.set_num_threads(4)
12
  # Check Device
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
  print(f"🚀 Loading {MODEL_ID} on {device}...")
 
18
  MODEL_ID,
19
  dtype=torch.float16 if device == "cuda" else torch.float32,
20
  device_map="auto",
21
+ trust_remote_code=True,
22
+ low_cpu_mem_usage=True # Optimize RAM loading
23
  )
24
  except Exception as e:
25
  print(f"❌ Error loading model: {e}")