MaxBlumenfeld commited on
Commit
0fb2bdc
·
1 Parent(s): bb6a531

trying with just base model

Browse files
Files changed (1) hide show
  1. app.py +152 -103
app.py CHANGED
@@ -1,124 +1,173 @@
1
- import torch
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaConfig
3
- import gradio as gr
4
 
5
 
6
- # Model IDs from Hugging Face Hub
7
- base_model_id = "HuggingFaceTB/SmolLM2-135M"
8
- instruct_model_id = "MaxBlumenfeld/smollm2-135m-bootleg-instruct-01"
9
 
10
- # Load tokenizer
11
- base_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
12
 
13
- # Load models with explicit LLaMA architecture
14
- base_model = LlamaForCausalLM.from_pretrained(base_model_id)
15
- instruct_model = LlamaForCausalLM.from_pretrained(instruct_model_id)
16
 
17
- def generate_response(model, tokenizer, message, temperature=0.5, max_length=200, system_prompt="", is_instruct=False):
18
- # Prepare input based on model type
19
- if is_instruct:
20
- if system_prompt:
21
- full_prompt = f"{system_prompt}\n\nHuman: {message}\nAssistant:"
22
- else:
23
- full_prompt = f"Human: {message}\nAssistant:"
24
- else:
25
- # For base model, use simpler prompt format
26
- full_prompt = message
27
 
28
- inputs = tokenizer(full_prompt, return_tensors="pt")
29
 
30
- with torch.no_grad():
31
- outputs = model.generate(
32
- inputs.input_ids,
33
- max_length=max_length,
34
- do_sample=True,
35
- temperature=temperature,
36
- top_k=50,
37
- top_p=0.95,
38
- num_return_sequences=1,
39
- pad_token_id=tokenizer.eos_token_id # Add padding token
40
- )
41
 
42
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
43
 
44
- if is_instruct:
45
- try:
46
- response = response.split("Assistant:")[-1].strip()
47
- except:
48
- pass
49
- else:
50
- response = response[len(full_prompt):].strip()
51
 
52
- return response
53
 
54
- def chat(message, temperature, max_length, system_prompt):
55
- # Generate responses from both models
56
- base_response = generate_response(
57
- base_model,
58
- base_tokenizer,
59
- message,
60
- temperature,
61
- max_length,
62
- system_prompt,
63
- is_instruct=False
64
- )
65
 
66
- instruct_response = generate_response(
67
- instruct_model,
68
- base_tokenizer,
69
- message,
70
- temperature,
71
- max_length,
72
- system_prompt,
73
- is_instruct=True
74
- )
75
 
76
- return base_response, instruct_response
77
 
78
- # Create Gradio interface
79
- with gr.Blocks() as demo:
80
- gr.Markdown("# SmolLM2-135M Comparison Demo")
81
- gr.Markdown("Compare responses between base and fine-tuned versions of SmolLM2-135M")
82
 
83
- with gr.Row():
84
- with gr.Column():
85
- message_input = gr.Textbox(label="Input Message")
86
- system_prompt = gr.Textbox(
87
- label="System Prompt (Optional)",
88
- placeholder="Set context or personality for the model",
89
- lines=3
90
- )
91
 
92
- with gr.Column():
93
- temperature = gr.Slider(
94
- minimum=0.1,
95
- maximum=2.0,
96
- value=0.5,
97
- label="Temperature"
98
- )
99
- max_length = gr.Slider(
100
- minimum=50,
101
- maximum=500,
102
- value=200,
103
- step=10,
104
- label="Max Length"
105
- )
106
 
107
- with gr.Row():
108
- with gr.Column():
109
- gr.Markdown("### Base Model Response")
110
- base_output = gr.Textbox(label="Base Model (SmolLM2-135M)", lines=5)
111
 
112
- with gr.Column():
113
- gr.Markdown("### Bootleg Instruct Model Response")
114
- instruct_output = gr.Textbox(label="Fine-tuned Model", lines=5)
115
 
116
- submit_btn = gr.Button("Generate Responses")
117
- submit_btn.click(
118
- fn=chat,
119
- inputs=[message_input, temperature, max_length, system_prompt],
120
- outputs=[base_output, instruct_output]
121
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  if __name__ == "__main__":
124
- demo.launch()
 
 
1
+ # import torch
2
+ # from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaConfig
3
+ # import gradio as gr
4
 
5
 
6
+ # # Model IDs from Hugging Face Hub
7
+ # base_model_id = "HuggingFaceTB/SmolLM2-135M"
8
+ # instruct_model_id = "MaxBlumenfeld/smollm2-135m-bootleg-instruct-01"
9
 
10
+ # # Load tokenizer
11
+ # base_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
12
 
13
+ # # Load models with explicit LLaMA architecture
14
+ # base_model = LlamaForCausalLM.from_pretrained(base_model_id)
15
+ # instruct_model = LlamaForCausalLM.from_pretrained(instruct_model_id)
16
 
17
+ # def generate_response(model, tokenizer, message, temperature=0.5, max_length=200, system_prompt="", is_instruct=False):
18
+ # # Prepare input based on model type
19
+ # if is_instruct:
20
+ # if system_prompt:
21
+ # full_prompt = f"{system_prompt}\n\nHuman: {message}\nAssistant:"
22
+ # else:
23
+ # full_prompt = f"Human: {message}\nAssistant:"
24
+ # else:
25
+ # # For base model, use simpler prompt format
26
+ # full_prompt = message
27
 
28
+ # inputs = tokenizer(full_prompt, return_tensors="pt")
29
 
30
+ # with torch.no_grad():
31
+ # outputs = model.generate(
32
+ # inputs.input_ids,
33
+ # max_length=max_length,
34
+ # do_sample=True,
35
+ # temperature=temperature,
36
+ # top_k=50,
37
+ # top_p=0.95,
38
+ # num_return_sequences=1,
39
+ # pad_token_id=tokenizer.eos_token_id # Add padding token
40
+ # )
41
 
42
+ # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
43
 
44
+ # if is_instruct:
45
+ # try:
46
+ # response = response.split("Assistant:")[-1].strip()
47
+ # except:
48
+ # pass
49
+ # else:
50
+ # response = response[len(full_prompt):].strip()
51
 
52
+ # return response
53
 
54
+ # def chat(message, temperature, max_length, system_prompt):
55
+ # # Generate responses from both models
56
+ # base_response = generate_response(
57
+ # base_model,
58
+ # base_tokenizer,
59
+ # message,
60
+ # temperature,
61
+ # max_length,
62
+ # system_prompt,
63
+ # is_instruct=False
64
+ # )
65
 
66
+ # instruct_response = generate_response(
67
+ # instruct_model,
68
+ # base_tokenizer,
69
+ # message,
70
+ # temperature,
71
+ # max_length,
72
+ # system_prompt,
73
+ # is_instruct=True
74
+ # )
75
 
76
+ # return base_response, instruct_response
77
 
78
+ # # Create Gradio interface
79
+ # with gr.Blocks() as demo:
80
+ # gr.Markdown("# SmolLM2-135M Comparison Demo")
81
+ # gr.Markdown("Compare responses between base and fine-tuned versions of SmolLM2-135M")
82
 
83
+ # with gr.Row():
84
+ # with gr.Column():
85
+ # message_input = gr.Textbox(label="Input Message")
86
+ # system_prompt = gr.Textbox(
87
+ # label="System Prompt (Optional)",
88
+ # placeholder="Set context or personality for the model",
89
+ # lines=3
90
+ # )
91
 
92
+ # with gr.Column():
93
+ # temperature = gr.Slider(
94
+ # minimum=0.1,
95
+ # maximum=2.0,
96
+ # value=0.5,
97
+ # label="Temperature"
98
+ # )
99
+ # max_length = gr.Slider(
100
+ # minimum=50,
101
+ # maximum=500,
102
+ # value=200,
103
+ # step=10,
104
+ # label="Max Length"
105
+ # )
106
 
107
+ # with gr.Row():
108
+ # with gr.Column():
109
+ # gr.Markdown("### Base Model Response")
110
+ # base_output = gr.Textbox(label="Base Model (SmolLM2-135M)", lines=5)
111
 
112
+ # with gr.Column():
113
+ # gr.Markdown("### Bootleg Instruct Model Response")
114
+ # instruct_output = gr.Textbox(label="Fine-tuned Model", lines=5)
115
 
116
+ # submit_btn = gr.Button("Generate Responses")
117
+ # submit_btn.click(
118
+ # fn=chat,
119
+ # inputs=[message_input, temperature, max_length, system_prompt],
120
+ # outputs=[base_output, instruct_output]
121
+ # )
122
+
123
+ # if __name__ == "__main__":
124
+ # demo.launch()
125
+
126
+
127
+
128
+ from transformers import AutoTokenizer, AutoModelForCausalLM
129
+ import torch
130
+ import gradio as gr
131
+
132
+ model_id = "MaxBlumenfeld/smollm2-135m"
133
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
134
+ model = AutoModelForCausalLM.from_pretrained(model_id)
135
+
136
+ def generate_response(message, temperature=0.7, max_length=200):
137
+ prompt = f"Human: {message}\nAssistant:"
138
+ inputs = tokenizer(prompt, return_tensors="pt")
139
+
140
+ with torch.no_grad():
141
+ outputs = model.generate(
142
+ inputs.input_ids,
143
+ max_length=max_length,
144
+ temperature=temperature,
145
+ do_sample=True,
146
+ pad_token_id=tokenizer.eos_token_id
147
+ )
148
+
149
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
150
+ return response.split("Assistant:")[-1].strip()
151
+
152
+ with gr.Blocks() as demo:
153
+ gr.Markdown("# SmolLM2 Bootleg Instruct Chat")
154
+
155
+ with gr.Row():
156
+ with gr.Column():
157
+ message = gr.Textbox(label="Message")
158
+ temp = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature")
159
+ max_len = gr.Slider(minimum=50, maximum=500, value=200, label="Max Length")
160
+ submit = gr.Button("Send")
161
+
162
+ with gr.Column():
163
+ output = gr.Textbox(label="Response")
164
+
165
+ submit.click(
166
+ generate_response,
167
+ inputs=[message, temp, max_len],
168
+ outputs=output
169
+ )
170
 
171
  if __name__ == "__main__":
172
+ demo.launch()
173
+