from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import torch

# Load a model suited for code generation
model_name = "Salesforce/codegen-350M-mono"  # Choose a suitable model for your needs
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set a padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Set pad_token to eos_token

model = AutoModelForCausalLM.from_pretrained(model_name)
# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def generate_code(prompt):
    # Refined prompt
    full_prompt = f"Please generate a basic HTML template for a personal blog, including a header, main content area for posts, and a footer. The output should be only the HTML code."
    
    # Tokenize the input
    input_tensor = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True).to(device)
    
    # Generate code with attention mask
    with torch.no_grad():
        generated_ids = model.generate(
            input_tensor['input_ids'],
            attention_mask=input_tensor['attention_mask'],
            max_length=500,  # Increase if necessary to capture full HTML
            num_beams=5,
            early_stopping=True,
            pad_token_id=tokenizer.pad_token_id
        )
    
    # Decode and return the generated code
    generated_code = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return generated_code

# Set up the Gradio interface
iface = gr.Interface(fn=generate_code, inputs="text", outputs="text", allow_flagging="never")

# Launch the app
iface.launch(server_name="0.0.0.0", server_port=7860)