from transformers import CLIPModel, CLIPProcessor import time import gradio as gr def get_zero_shot_classification_tab(): openai_model_name = "openai/clip-vit-large-patch14" openai_model = CLIPModel.from_pretrained(openai_model_name) openai_processor = CLIPProcessor.from_pretrained(openai_model_name) patrickjohncyh_model_name = "patrickjohncyh/fashion-clip" patrickjohncyh_model = CLIPModel.from_pretrained(patrickjohncyh_model_name) patrickjohncyh_processor = CLIPProcessor.from_pretrained(patrickjohncyh_model_name) model_map = { openai_model_name: (openai_model, openai_processor), patrickjohncyh_model_name: (patrickjohncyh_model, patrickjohncyh_processor) } def gradio_process(model_name, image, text): (model, processor) = model_map[model_name] labels = text.split(", ") print (labels) start = time.time() inputs = processor(text=labels, images=image, return_tensors="pt", padding=True) outputs = model(**inputs) probs = outputs.logits_per_image.softmax(dim=1)[0] end = time.time() time_spent = end - start probs = list(probs) results = [] for i in range(len(labels)): results.append(f"{labels[i]} - {probs[i].item():.4f}") result = "\n".join(results) return [result, time_spent] with gr.TabItem("Zero-Shot Classification") as zero_shot_image_classification_tab: gr.Markdown("# Zero-Shot Image Classification") with gr.Row(): with gr.Column(): # Input components input_image = gr.Image(label="Upload Image", type="pil") input_text = gr.Textbox(label="Labels (comma separated)") model_selector = gr.Dropdown([openai_model_name, patrickjohncyh_model_name], label = "Select Model") # Process button process_btn = gr.Button("Classificate") with gr.Column(): # Output components elapsed_result = gr.Textbox(label="Seconds elapsed", lines=1) output_text = gr.Textbox(label="Classification") # Connect the input components to the processing function process_btn.click( fn=gradio_process, inputs=[ model_selector, input_image, input_text ], outputs=[output_text, elapsed_result] ) return zero_shot_image_classification_tab