import gradio as gr import torch from transformers import AutoModel, AutoTokenizer from huggingface_hub import hf_hub_download import spaces # Download the model and tokenizer model_name = 'internlm/internlm-xcomposer2d5-7b' model = AutoModel.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model.tokenizer = tokenizer @spaces.GPU def process_video(video_path, query): torch.set_grad_enabled(False) with torch.autocast(device_type='cuda', dtype=torch.float16): response, history = model.chat(tokenizer, query, [video_path], do_sample=False, num_beams=3, use_meta=True) return response iface = gr.Interface( fn=process_video, inputs=[ gr.Video(label="Upload Video"), gr.Textbox(label="Enter your query") ], outputs=gr.Textbox(label="Response"), title="Video Analysis with InternLM-XComposer", description="Upload a video and ask a question about it." ) iface.launch(debug=True)