michsethowusu commited on
Commit
d97f5cf
Β·
verified Β·
1 Parent(s): d435fb6

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +215 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ from peft import PeftModel
5
+ from threading import Thread
6
+
7
+ # Your model configuration
8
+ BASE_MODEL = "unsloth/Qwen3-4B-Instruct-2507"
9
+ LORA_MODEL = "michsethowusu/twi_code_assistant"
10
+
11
+ print("Loading base model...")
12
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
13
+ base_model = AutoModelForCausalLM.from_pretrained(
14
+ BASE_MODEL,
15
+ torch_dtype=torch.float16,
16
+ device_map="auto",
17
+ low_cpu_mem_usage=True
18
+ )
19
+
20
+ print("Loading LoRA adapters...")
21
+ model = PeftModel.from_pretrained(base_model, LORA_MODEL)
22
+ model = model.merge_and_unload() # Merge for faster inference
23
+ print("Model ready!")
24
+
25
+ def generate_response(message, history, temperature, top_p, top_k, max_tokens):
26
+ """Generate response from the model with streaming"""
27
+
28
+ # Build conversation history
29
+ messages = []
30
+ for user_msg, assistant_msg in history:
31
+ messages.append({"role": "user", "content": user_msg})
32
+ messages.append({"role": "assistant", "content": assistant_msg})
33
+ messages.append({"role": "user", "content": message})
34
+
35
+ # Apply chat template
36
+ text = tokenizer.apply_chat_template(
37
+ messages,
38
+ tokenize=False,
39
+ add_generation_prompt=True
40
+ )
41
+
42
+ # Tokenize
43
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
44
+
45
+ # Setup streaming
46
+ streamer = TextIteratorStreamer(
47
+ tokenizer,
48
+ skip_prompt=True,
49
+ skip_special_tokens=True
50
+ )
51
+
52
+ # Generation kwargs
53
+ generation_kwargs = {
54
+ **inputs,
55
+ "max_new_tokens": max_tokens,
56
+ "temperature": temperature,
57
+ "top_p": top_p,
58
+ "top_k": top_k,
59
+ "do_sample": True,
60
+ "streamer": streamer,
61
+ }
62
+
63
+ # Start generation in separate thread
64
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
65
+ thread.start()
66
+
67
+ # Stream the response
68
+ partial_message = ""
69
+ for new_text in streamer:
70
+ partial_message += new_text
71
+ yield partial_message
72
+
73
+ thread.join()
74
+
75
+ # Create Gradio interface
76
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
77
+ gr.Markdown(
78
+ """
79
+ # πŸ‡¬πŸ‡­ Twi Code Assistant
80
+
81
+ A fine-tuned Qwen3-4B model specialized for coding assistance in Twi language context.
82
+ Ask me anything about programming, and I'll help you out!
83
+ """
84
+ )
85
+
86
+ chatbot = gr.Chatbot(
87
+ height=500,
88
+ label="Chat History",
89
+ type="messages",
90
+ avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png")
91
+ )
92
+
93
+ with gr.Row():
94
+ msg = gr.Textbox(
95
+ label="Your Message",
96
+ placeholder="Ask me a coding question...",
97
+ scale=4,
98
+ lines=2
99
+ )
100
+ submit = gr.Button("Send πŸš€", scale=1, variant="primary")
101
+
102
+ with gr.Accordion("βš™οΈ Generation Parameters", open=False):
103
+ gr.Markdown("*Adjust these settings to control the response style*")
104
+
105
+ temperature = gr.Slider(
106
+ minimum=0.1,
107
+ maximum=2.0,
108
+ value=0.7,
109
+ step=0.1,
110
+ label="Temperature",
111
+ info="Higher = more creative, Lower = more focused"
112
+ )
113
+ top_p = gr.Slider(
114
+ minimum=0.1,
115
+ maximum=1.0,
116
+ value=0.8,
117
+ step=0.05,
118
+ label="Top P",
119
+ info="Nucleus sampling threshold"
120
+ )
121
+ top_k = gr.Slider(
122
+ minimum=1,
123
+ maximum=100,
124
+ value=20,
125
+ step=1,
126
+ label="Top K",
127
+ info="Number of top tokens to consider"
128
+ )
129
+ max_tokens = gr.Slider(
130
+ minimum=64,
131
+ maximum=2048,
132
+ value=512,
133
+ step=64,
134
+ label="Max Tokens",
135
+ info="Maximum length of response"
136
+ )
137
+
138
+ with gr.Row():
139
+ clear = gr.Button("πŸ—‘οΈ Clear Chat")
140
+
141
+ # Example prompts
142
+ gr.Examples(
143
+ examples=[
144
+ ["How do I create a Python function?"],
145
+ ["Explain what a for loop does"],
146
+ ["Write a simple calculator program"],
147
+ ["What's the difference between a list and a tuple?"],
148
+ ["Help me debug this code"],
149
+ ],
150
+ inputs=msg,
151
+ label="Example Questions"
152
+ )
153
+
154
+ # Event handlers
155
+ def user_submit(user_message, history):
156
+ return "", history + [[user_message, None]]
157
+
158
+ def bot_respond(history, temperature, top_p, top_k, max_tokens):
159
+ user_message = history[-1][0]
160
+ history_context = history[:-1]
161
+
162
+ history[-1][1] = ""
163
+ for response in generate_response(
164
+ user_message,
165
+ history_context,
166
+ temperature,
167
+ top_p,
168
+ top_k,
169
+ max_tokens
170
+ ):
171
+ history[-1][1] = response
172
+ yield history
173
+
174
+ # Connect events
175
+ msg.submit(
176
+ user_submit,
177
+ [msg, chatbot],
178
+ [msg, chatbot],
179
+ queue=False
180
+ ).then(
181
+ bot_respond,
182
+ [chatbot, temperature, top_p, top_k, max_tokens],
183
+ chatbot
184
+ )
185
+
186
+ submit.click(
187
+ user_submit,
188
+ [msg, chatbot],
189
+ [msg, chatbot],
190
+ queue=False
191
+ ).then(
192
+ bot_respond,
193
+ [chatbot, temperature, top_p, top_k, max_tokens],
194
+ chatbot
195
+ )
196
+
197
+ clear.click(lambda: None, None, chatbot, queue=False)
198
+
199
+ gr.Markdown(
200
+ """
201
+ ---
202
+ ### πŸ’‘ Tips for Best Results:
203
+ - **Factual/Technical questions**: Use temperature 0.3-0.5
204
+ - **Creative coding solutions**: Use temperature 0.7-1.0
205
+ - **Code generation**: Use temperature 0.5-0.7
206
+
207
+ ### πŸ“ About This Model
208
+ This is a fine-tuned Qwen3-4B model trained with Unsloth for efficient coding assistance.
209
+
210
+ **Model**: [michsethowusu/twi_code_assistant](https://huggingface.co/michsethowusu/twi_code_assistant)
211
+ """
212
+ )
213
+
214
+ if __name__ == "__main__":
215
+ demo.queue().launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ transformers>=4.55.4
2
+ torch>=2.0.0
3
+ gradio>=4.0.0
4
+ accelerate>=0.20.0
5
+ peft>=0.7.0
6
+ sentencepiece
7
+ protobuf
8
+ bitsandbytes