import gradio as gr import os import torch import subprocess from transformers import AutoModelForCausalLM from huggingface_hub import login # Install required package subprocess.run( "pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True, ) hf_token = os.getenv("HF_TOKEN") login(token=hf_token, add_to_git_credential=True) # Function to get the model summary @spaces.GPU def get_model_summary(model_name): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device) return str(model) # Create the Gradio Blocks interface with gr.Blocks() as demo: with gr.Row(): with gr.Column(): textbox = gr.Textbox(label="Model Name") examples = gr.Examples( examples=[ ["google/gemma-7b"], ["microsoft/Phi-3-mini-4k-instruct"], ["meta-llama/Meta-Llama-3-8B"], ["mistralai/Mistral-7B-Instruct-v0.3"], ["vikhyatk/moondream2"], ["microsoft/Phi-3-vision-128k-instruct"], ["openbmb/MiniCPM-Llama3-V-2_5"], ["google/paligemma-3b-mix-224"], ["HuggingFaceM4/idefics2-8b-chatty"], ["mistralai/Codestral-22B-v0.1"] ], inputs=textbox ) submit_button = gr.Button("Submit") with gr.Column(): output = gr.Textbox(label="Output", lines=20) submit_button.click(fn=get_model_summary, inputs=textbox, outputs=output) # Launch the interface demo.launch()