GhostScientist commited on
Commit
5a4b365
·
verified ·
1 Parent(s): 57b9ad8

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +13 -20
README.md CHANGED
@@ -9,7 +9,7 @@ app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  short_description: Code assistant powered by fine-tuned Qwen 2.5 Coder
12
- suggested_hardware: t4-small
13
  ---
14
 
15
  # Qwen 2.5 Coder Assistant
 
9
  pinned: false
10
  license: apache-2.0
11
  short_description: Code assistant powered by fine-tuned Qwen 2.5 Coder
12
+ suggested_hardware: zero-a10g
13
  ---
14
 
15
  # Qwen 2.5 Coder Assistant
app.py CHANGED
@@ -1,19 +1,21 @@
1
  import gradio as gr
 
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
- from threading import Thread
5
 
6
  MODEL_ID = "GhostScientist/qwen25-coder-1.5b-codealpaca-sft"
7
 
8
- # Load model and tokenizer
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
 
10
  model = AutoModelForCausalLM.from_pretrained(
11
  MODEL_ID,
12
  torch_dtype=torch.float16,
13
- device_map="auto",
14
  )
15
 
16
- def respond(message, history, system_message, max_tokens, temperature, top_p):
 
17
  """Generate response using the fine-tuned Qwen coder model."""
18
  messages = [{"role": "system", "content": system_message}]
19
 
@@ -33,12 +35,9 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
33
  )
34
  inputs = tokenizer([text], return_tensors="pt").to(model.device)
35
 
36
- # Set up streaming
37
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
38
-
39
- generation_kwargs = dict(
40
  **inputs,
41
- streamer=streamer,
42
  max_new_tokens=int(max_tokens),
43
  temperature=temperature,
44
  top_p=top_p,
@@ -46,15 +45,9 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
46
  pad_token_id=tokenizer.eos_token_id,
47
  )
48
 
49
- # Run generation in a thread
50
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
51
- thread.start()
52
-
53
- # Stream the response
54
- response = ""
55
- for new_text in streamer:
56
- response += new_text
57
- yield response
58
 
59
 
60
  SYSTEM_PROMPT = """You are an expert coding assistant. You help users write, debug, explain, and improve code.
@@ -70,7 +63,7 @@ EXAMPLES = [
70
  ]
71
 
72
  demo = gr.ChatInterface(
73
- respond,
74
  title="Qwen 2.5 Coder Assistant",
75
  description="""A fine-tuned Qwen 2.5 Coder 1.5B model for code assistance.
76
  Ask me to write code, explain concepts, debug issues, or help with any programming task!
 
1
  import gradio as gr
2
+ import spaces
3
  import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
5
 
6
  MODEL_ID = "GhostScientist/qwen25-coder-1.5b-codealpaca-sft"
7
 
8
+ # Load tokenizer at startup
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
10
+
11
+ # Load model at startup (will be moved to GPU when @spaces.GPU is called)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  MODEL_ID,
14
  torch_dtype=torch.float16,
 
15
  )
16
 
17
+ @spaces.GPU
18
+ def generate_response(message, history, system_message, max_tokens, temperature, top_p):
19
  """Generate response using the fine-tuned Qwen coder model."""
20
  messages = [{"role": "system", "content": system_message}]
21
 
 
35
  )
36
  inputs = tokenizer([text], return_tensors="pt").to(model.device)
37
 
38
+ # Generate response
39
+ outputs = model.generate(
 
 
40
  **inputs,
 
41
  max_new_tokens=int(max_tokens),
42
  temperature=temperature,
43
  top_p=top_p,
 
45
  pad_token_id=tokenizer.eos_token_id,
46
  )
47
 
48
+ # Decode only the new tokens
49
+ response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
50
+ return response
 
 
 
 
 
 
51
 
52
 
53
  SYSTEM_PROMPT = """You are an expert coding assistant. You help users write, debug, explain, and improve code.
 
63
  ]
64
 
65
  demo = gr.ChatInterface(
66
+ generate_response,
67
  title="Qwen 2.5 Coder Assistant",
68
  description="""A fine-tuned Qwen 2.5 Coder 1.5B model for code assistance.
69
  Ask me to write code, explain concepts, debug issues, or help with any programming task!