File size: 2,814 Bytes
bc183a3
a3e1670
 
 
 
3f47689
a3e1670
 
 
bc183a3
 
 
 
a3e1670
36af0e0
3f47689
a3e1670
3f47689
 
 
 
a3e1670
 
3f47689
 
a3e1670
 
 
 
b694991
a3e1670
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f47689
 
a3e1670
3f47689
a000b38
a3e1670
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc82cf6
7b95f06
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import torch
import numpy as np
from transformers import ViTForImageClassification, ViTModel, ViTImageProcessor
from PIL import Image
import PIL
import io
from sklearn.preprocessing import LabelEncoder
import json

def greet(name):
    return "Hello " + name + "!!"


async def test2(file, top_k: int = 5):
    # extension = file.filename.split(".")[-1] in ("jpg", "jpeg", "png")
        
    # if not extension:
    #     return "Image format must be jpg, jpeg, or png!"
    # # Read image contents
    # contents = await file.read()
        
        # Preprocess image
    # image_tensor = preprocess_image(contents)
    image_tensor = preprocess_image(file)
        
        # Make predictions
    predictions = predict(image_tensor, top_k)
        
    item = {"predictions": predictions}
    return json.dumps(item)

encoder = LabelEncoder()
encoder.classes_ = np.load('encoder.npy', allow_pickle=True)

pretrained_model = ViTModel.from_pretrained('pillIdentifierAI/pillIdentifier')
feature_extractor = ViTImageProcessor(
    image_size=224,
    do_resize=True,
    do_normalize=True,
    do_rescale=False,
    image_mean=[0.5, 0.5, 0.5],
    image_std=[0.5, 0.5, 0.5],
)


config = pretrained_model.config
config.num_labels = 2112  # Change this to the appropriate number of classes
model = ViTForImageClassification(config)
model.vit = pretrained_model

model.eval()

# def preprocess_image(contents):
def preprocess_image(image):
    # Convert image bytes to PIL Image
    # image = Image.open(io.BytesIO(contents))
    image = Image.fromarray(np.uint8(image))
    if image.mode != 'RGB':
        image = image.convert('RGB')
    
    # Use the feature extractor directly
    inputs = feature_extractor(images=[image])
    image_tensor = inputs['pixel_values'][0]
    
    # Convert to tensor
    image_tensor = torch.tensor(image_tensor, dtype=torch.float32)
    
    return image_tensor

def predict(image_tensor, top_k=5):
    # Ensure the model is in evaluation mode
    model.eval()
    
    # Make prediction
    with torch.no_grad():
        outputs = model(pixel_values=image_tensor.unsqueeze(0))  # Add batch dimension
        logits = outputs.logits.numpy()
    
    # Get top k predictions and their probabilities
    predictions = np.argsort(logits, axis=1)[:, ::-1][:, :top_k]
    probabilities = np.sort(logits, axis=1)[:, ::-1][:, :top_k]

    # Decode predictions using the label encoder and create the result dictionary
    result = {}
    for i in range(top_k):
        class_name = encoder.inverse_transform([predictions[0][i]])[0]
        probability = probabilities[0][i]
        result[i + 1] = {'label': str(class_name), 'probability': float(probability)}

    return result

iface = gr.Interface(fn=test2, inputs="image", outputs="text")
iface.launch(share=True)