|
import gradio as gr |
|
import torch |
|
from torchvision import transforms |
|
from PIL import Image |
|
from transformers import pipeline |
|
import os |
|
from ultralytics import YOLO |
|
|
|
|
|
image_model=YOLO("quality_best.pt") |
|
|
|
|
|
image_transforms = transforms.Compose([ |
|
transforms.Resize((224, 224)), |
|
transforms.ToTensor(), |
|
]) |
|
|
|
def predict_image(image): |
|
if image is None: |
|
return "No image uploaded" |
|
|
|
results = image_model(image) |
|
boxes = results[0].boxes |
|
if boxes is None or len(boxes) == 0: |
|
return "No objects detected" |
|
|
|
cls = int(boxes.cls[0].item()) |
|
labels = ["Poor Quality", "Good Quality"] |
|
|
|
if cls >= len(labels): |
|
return "Unknown class detected" |
|
|
|
return labels[cls] |
|
|
|
|
|
|
|
HUGGINGFACE_TOKEN = os.getenv("space-token") |
|
|
|
llm = pipeline( |
|
"text-generation", |
|
model="google/gemma-3n-e2b-it", |
|
token=HUGGINGFACE_TOKEN |
|
) |
|
|
|
def assess_text(prompt): |
|
if not prompt.strip(): |
|
return "No description provided." |
|
output = llm(prompt, max_new_tokens=150) |
|
return str(output[0]["generated_text"]) |
|
|
|
|
|
|
|
def combined_assessment(image, description): |
|
image_result = predict_image(image) |
|
text_result = assess_text(description) |
|
return str(image_result), str(text_result) |
|
|
|
|
|
demo = gr.Interface( |
|
fn=combined_assessment, |
|
inputs=[ |
|
gr.Image(type="pil", label="Upload Cocoa Bean Image"), |
|
gr.Textbox(label="Optional Description (e.g., color, smell, texture)") |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Image-Based Prediction"), |
|
gr.Textbox(label="Gemma LLM Assessment") |
|
], |
|
title="Cocoa Bean Quality Checker (Image + LLM)", |
|
description="Upload a cocoa bean image and/or provide a text description. This app uses a trained image classifier and Gemma LLM to assess quality." |
|
) |
|
|
|
demo.launch() |
|
|