from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr import torch # Load a model suited for code generation model_name = "Salesforce/codegen-350M-mono" # Choose a suitable model for your needs tokenizer = AutoTokenizer.from_pretrained(model_name) # Set a padding token if it doesn't exist if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Set pad_token to eos_token model = AutoModelForCausalLM.from_pretrained(model_name) # Set the device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) def generate_code(prompt): # Refined prompt full_prompt = f"Please generate a basic HTML template for a personal blog, including a header, main content area for posts, and a footer. The output should be only the HTML code." # Tokenize the input input_tensor = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True).to(device) # Generate code with attention mask with torch.no_grad(): generated_ids = model.generate( input_tensor['input_ids'], attention_mask=input_tensor['attention_mask'], max_length=500, # Increase if necessary to capture full HTML num_beams=5, early_stopping=True, pad_token_id=tokenizer.pad_token_id ) # Decode and return the generated code generated_code = tokenizer.decode(generated_ids[0], skip_special_tokens=True) return generated_code # Set up the Gradio interface iface = gr.Interface(fn=generate_code, inputs="text", outputs="text", allow_flagging="never") # Launch the app iface.launch(server_name="0.0.0.0", server_port=7860)