michsethowusu commited on
Commit
dd830d8
·
verified ·
1 Parent(s): d967065

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -38
app.py CHANGED
@@ -1,52 +1,25 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
- from peft import PeftModel
5
  from threading import Thread
6
 
7
  # ------------------------------------------------------------------
8
  # 1. Model setup
9
  # ------------------------------------------------------------------
10
- BASE_MODEL = "unsloth/Qwen3-4B-Instruct-2507"
11
- LORA_MODEL = "michsethowusu/opani-coder"
12
 
13
  print("Loading tokenizer…")
14
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
15
 
16
- print("Loading base model…")
17
- base_model = AutoModelForCausalLM.from_pretrained(
18
- BASE_MODEL,
19
  torch_dtype=torch.float16,
20
  device_map="auto",
21
  low_cpu_mem_usage=True,
22
  trust_remote_code=True
23
  )
24
 
25
- print("Loading LoRA adapters…")
26
- try:
27
- # Try loading with the correct device_map handling
28
- model = PeftModel.from_pretrained(
29
- base_model,
30
- LORA_MODEL,
31
- device_map="auto",
32
- torch_dtype=torch.float16
33
- )
34
- print("Merging LoRA adapters…")
35
- model = model.merge_and_unload()
36
- except Exception as e:
37
- print(f"Error loading LoRA model: {e}")
38
- print("Attempting alternative loading method…")
39
- # Alternative: Load the model differently
40
- from peft import AutoPeftModelForCausalLM
41
- model = AutoPeftModelForCausalLM.from_pretrained(
42
- LORA_MODEL,
43
- torch_dtype=torch.float16,
44
- device_map="auto",
45
- low_cpu_mem_usage=True,
46
- trust_remote_code=True
47
- )
48
- model = model.merge_and_unload()
49
-
50
  print("Model ready!")
51
 
52
  # ------------------------------------------------------------------
@@ -119,8 +92,8 @@ def bot_respond(history, temperature, top_p, top_k, max_tokens):
119
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
120
  gr.Markdown(
121
  """
122
- # 🇬🇭 Opani Coder
123
- A fine-tuned Qwen3-4B model for coding assistance in Twi.
124
  Ask me anything about programming, and I'll help you out!
125
  """
126
  )
@@ -128,7 +101,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
128
  chatbot = gr.Chatbot(
129
  height=500,
130
  label="Chat History",
131
- type="messages", # NEW FORMAT
132
  avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"),
133
  )
134
 
@@ -144,7 +117,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
144
  with gr.Accordion("⚙️ Generation Parameters", open=False):
145
  gr.Markdown("*Adjust these settings to control the response style*")
146
  temperature = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
147
- top_p = gr.Slider(0.1, 1.0, 0.8, step=0.05, label="Top P")
148
  top_k = gr.Slider(1, 100, 20, step=1, label="Top K")
149
  max_tokens = gr.Slider(64, 2048, 512, step=64, label="Max Tokens")
150
 
@@ -195,8 +168,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
195
  - **Code generation**: temperature 0.5-0.7
196
 
197
  ### 📝 About This Model
198
- Fine-tuned Qwen3-4B with Unsloth for coding assistance in Twi.
199
- **Model**: [michsethowusu/opani-coder](https://huggingface.co/michsethowusu/opani-coder)
200
  """
201
  )
202
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
4
  from threading import Thread
5
 
6
  # ------------------------------------------------------------------
7
  # 1. Model setup
8
  # ------------------------------------------------------------------
9
+ MODEL_ID = "michsethowusu/opani-coder_1b-merged-16bit"
 
10
 
11
  print("Loading tokenizer…")
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
 
14
+ print("Loading model…")
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ MODEL_ID,
17
  torch_dtype=torch.float16,
18
  device_map="auto",
19
  low_cpu_mem_usage=True,
20
  trust_remote_code=True
21
  )
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  print("Model ready!")
24
 
25
  # ------------------------------------------------------------------
 
92
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
93
  gr.Markdown(
94
  """
95
+ # 🇬🇭 Opani Coder 1B
96
+ A fine-tuned Llama 3.2 1B model (16-bit) for coding assistance in Twi.
97
  Ask me anything about programming, and I'll help you out!
98
  """
99
  )
 
101
  chatbot = gr.Chatbot(
102
  height=500,
103
  label="Chat History",
104
+ type="messages",
105
  avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"),
106
  )
107
 
 
117
  with gr.Accordion("⚙️ Generation Parameters", open=False):
118
  gr.Markdown("*Adjust these settings to control the response style*")
119
  temperature = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
120
+ top_p = gr.Slider(0.1, 1.0, 0.9, step=0.05, label="Top P")
121
  top_k = gr.Slider(1, 100, 20, step=1, label="Top K")
122
  max_tokens = gr.Slider(64, 2048, 512, step=64, label="Max Tokens")
123
 
 
168
  - **Code generation**: temperature 0.5-0.7
169
 
170
  ### 📝 About This Model
171
+ Fine-tuned Llama 3.2 1B (16-bit full model) for coding assistance in Twi.
172
+ **Model**: [michsethowusu/opani-coder_1b-merged-16bit](https://huggingface.co/michsethowusu/opani-coder_1b-merged-16bit)
173
  """
174
  )
175