import gradio as gr gr.load("models/Qwen/Qwen2.5-7B").launch() model = gr.load("model_path") # Load the model only once at startup def predict(input_data): return model(input_data)