import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch print("Loading model and tokenizer...") # Initialize model and tokenizer model_id = "htigenai/finetune_test" # your model ID tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, device_map="auto", load_in_8bit=True # Use 8-bit quantization to reduce memory usage ) def generate_text(prompt): """Generate text based on the input prompt.""" try: # Tokenize input inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Generate outputs = model.generate( **inputs, max_new_tokens=200, temperature=0.7, top_p=0.95, do_sample=True, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id ) # Decode and return return tokenizer.decode(outputs[0], skip_special_tokens=True) except Exception as e: return f"Error during generation: {str(e)}" # Create Gradio interface iface = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=3, placeholder="Enter your prompt here..."), outputs=gr.Textbox(), title="Text Generation Model", description="Enter a prompt and get AI-generated text", examples=[ ["What are your thoughts about cats?"], ["Write a short story about a magical forest"], ["Explain quantum computing to a 5-year-old"], ] ) # Launch the interface iface.launch()