pradeeparul2 commited on
Commit
6388a60
·
verified ·
1 Parent(s): 9506633

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -1,10 +1,19 @@
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
 
 
 
 
5
  model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, device_map="auto")
 
 
 
 
8
 
9
  def chat(message, history):
10
  messages = [{"role": "user", "content": message}]
@@ -17,4 +26,5 @@ def chat(message, history):
17
  return history, ""
18
 
19
  demo = gr.ChatInterface(chat)
20
- demo.launch()
 
 
1
+ import os
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
+ # Redirect HF cache to /tmp (ephemeral, unlimited)
7
+ os.environ['HF_HOME'] = '/tmp/hf_home'
8
+ os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_cache'
9
+
10
  model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_name,
14
+ load_in_4bit=True, # Quantization for T4 GPU (~9-10GB VRAM)
15
+ device_map="auto"
16
+ )
17
 
18
  def chat(message, history):
19
  messages = [{"role": "user", "content": message}]
 
26
  return history, ""
27
 
28
  demo = gr.ChatInterface(chat)
29
+ if __name__ == "__main__":
30
+ demo.launch()