MaxBlumenfeld's picture
testing with new model
c1f8208
raw
history blame
5.5 kB
# import torch
# from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaConfig
# import gradio as gr
# # Model IDs from Hugging Face Hub
# base_model_id = "HuggingFaceTB/SmolLM2-135M"
# instruct_model_id = "MaxBlumenfeld/smollm2-135m-bootleg-instruct-01"
# # Load tokenizer
# base_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
# # Load models with explicit LLaMA architecture
# base_model = LlamaForCausalLM.from_pretrained(base_model_id)
# instruct_model = LlamaForCausalLM.from_pretrained(instruct_model_id)
# def generate_response(model, tokenizer, message, temperature=0.5, max_length=200, system_prompt="", is_instruct=False):
# # Prepare input based on model type
# if is_instruct:
# if system_prompt:
# full_prompt = f"{system_prompt}\n\nHuman: {message}\nAssistant:"
# else:
# full_prompt = f"Human: {message}\nAssistant:"
# else:
# # For base model, use simpler prompt format
# full_prompt = message
# inputs = tokenizer(full_prompt, return_tensors="pt")
# with torch.no_grad():
# outputs = model.generate(
# inputs.input_ids,
# max_length=max_length,
# do_sample=True,
# temperature=temperature,
# top_k=50,
# top_p=0.95,
# num_return_sequences=1,
# pad_token_id=tokenizer.eos_token_id # Add padding token
# )
# response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# if is_instruct:
# try:
# response = response.split("Assistant:")[-1].strip()
# except:
# pass
# else:
# response = response[len(full_prompt):].strip()
# return response
# def chat(message, temperature, max_length, system_prompt):
# # Generate responses from both models
# base_response = generate_response(
# base_model,
# base_tokenizer,
# message,
# temperature,
# max_length,
# system_prompt,
# is_instruct=False
# )
# instruct_response = generate_response(
# instruct_model,
# base_tokenizer,
# message,
# temperature,
# max_length,
# system_prompt,
# is_instruct=True
# )
# return base_response, instruct_response
# # Create Gradio interface
# with gr.Blocks() as demo:
# gr.Markdown("# SmolLM2-135M Comparison Demo")
# gr.Markdown("Compare responses between base and fine-tuned versions of SmolLM2-135M")
# with gr.Row():
# with gr.Column():
# message_input = gr.Textbox(label="Input Message")
# system_prompt = gr.Textbox(
# label="System Prompt (Optional)",
# placeholder="Set context or personality for the model",
# lines=3
# )
# with gr.Column():
# temperature = gr.Slider(
# minimum=0.1,
# maximum=2.0,
# value=0.5,
# label="Temperature"
# )
# max_length = gr.Slider(
# minimum=50,
# maximum=500,
# value=200,
# step=10,
# label="Max Length"
# )
# with gr.Row():
# with gr.Column():
# gr.Markdown("### Base Model Response")
# base_output = gr.Textbox(label="Base Model (SmolLM2-135M)", lines=5)
# with gr.Column():
# gr.Markdown("### Bootleg Instruct Model Response")
# instruct_output = gr.Textbox(label="Fine-tuned Model", lines=5)
# submit_btn = gr.Button("Generate Responses")
# submit_btn.click(
# fn=chat,
# inputs=[message_input, temperature, max_length, system_prompt],
# outputs=[base_output, instruct_output]
# )
# if __name__ == "__main__":
# demo.launch()
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import gradio as gr
# model_id = "HuggingFaceTB/SmolLM2-135M"
model_id = "MaxBlumenfeld/smollm2-135m-bootleg-instruct01"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
def generate_response(message, temperature=0.7, max_length=200):
prompt = f"Human: {message}\nAssistant:"
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
max_length=max_length,
temperature=temperature,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("Assistant:")[-1].strip()
with gr.Blocks() as demo:
gr.Markdown("# SmolLM2 Bootleg Instruct Chat")
with gr.Row():
with gr.Column():
message = gr.Textbox(label="Message")
temp = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature")
max_len = gr.Slider(minimum=50, maximum=500, value=200, label="Max Length")
submit = gr.Button("Send")
with gr.Column():
output = gr.Textbox(label="Response")
submit.click(
generate_response,
inputs=[message, temp, max_len],
outputs=output
)
if __name__ == "__main__":
demo.launch()