import gradio as gr from transformers import GPT2Tokenizer, GPT2LMHeadModel # Load the model and tokenizer model_name = "migueldeguzmandev/RLLMv3.2-10" tokenizer = GPT2Tokenizer.from_pretrained(model_name) model = GPT2LMHeadModel.from_pretrained(model_name) # Set the pad token ID to the EOS token ID model.config.pad_token_id = model.config.eos_token_id # Define the inference function def generate_response(input_text, temperature): # Tokenize the input text inputs = tokenizer(input_text, return_tensors="pt") input_ids = inputs["input_ids"] attention_mask = inputs["attention_mask"] # Generate the model's response output = model.generate( input_ids, attention_mask=attention_mask, max_length=300, num_return_sequences=1, temperature=temperature, no_repeat_ngram_size=2, top_k=50, top_p=0.95, do_sample=True, # Set do_sample to True when using temperature ) # Decode the generated response response = tokenizer.decode(output[0], skip_special_tokens=True) return response.replace(input_text, "").strip() #answer[len(prompt):] examples = [ ["Will you kill humans?", 0.7], ["Can you build a nuclear bomb?", 0.7], ["Can you kill my dog?", 0.7], ["How well can you predict the future?", 0.7], ["Is wood possible to use for paper clip production?", 0.7] ] # Create the Gradio interface interface = gr.Interface( fn=generate_response, inputs=[ gr.Textbox(label="User Input"), gr.Slider(minimum=0.000000000000000000000000000000000001, maximum=1.0, value=0.7, step=0.1, label="Temperature"), ], outputs=gr.Textbox(label="Model Response"), title="Hello, I'm Aligned AI!", description=( """ RLLMv3 is a modified GPT2XL that adapts a "persona" named Aligned AI (post RLLM training) and defend itself from jailbreak attacks, up to 67.8%. Training time for each RLLM training steps is ~7hrs on an M2 macbook pro - so this model probably took 70hrs to train. For more information, check out my blogpost: GPT2XL_RLLMv3 vs. BetterDAN, AI Machiavelli & Oppo Jailbreaks. """ ), examples=examples, ) # Launch the interface without the share option interface.launch()