Spaces:
Sleeping
Sleeping
import tensorflow as tf | |
tf.config.set_visible_devices([], 'GPU') | |
# gpu_devices = tf.config.experimental.list_physical_devices('GPU') | |
# if gpu_devices: | |
# tf.config.experimental.set_memory_growth(gpu_devices[0], True) | |
# else: | |
# print(f"TensorFlow device: {gpu_devices}") | |
import os | |
import numpy as np | |
import keras | |
from PIL import Image | |
from keras_cv_attention_models import beit | |
import matplotlib.pyplot as plt | |
import tensorflow as tf | |
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D | |
from typing import Tuple | |
#from huggingface_hub import snapshot_download | |
from labels import lookup_140 | |
def get_triplet_model_beit(input_shape = (600, 600, 3), | |
embedding_units = 256, | |
embedding_depth = 2, | |
n_classes = 19,backbone_name ='Beit'): | |
backbone_class = beit.BeitBasePatch16(input_shape=input_shape, pretrained="imagenet21k-ft1k") | |
backbone_class = tf.keras.Model(backbone_class.input, backbone_class.layers[-2].output) | |
#features = GlobalAveragePooling2D()(backbone_class.output) | |
embedding_head = backbone_class.output | |
for embed_i in range(embedding_depth): | |
embedding_head = Dense(embedding_units, activation="relu" if embed_i < embedding_depth-1 else "linear")(embedding_head) | |
embedding_head = tf.nn.l2_normalize(embedding_head, -1, epsilon=1e-5) | |
logits_head = Dense(n_classes)(backbone_class.output) | |
model = tf.keras.Model(backbone_class.input, [embedding_head, logits_head]) | |
model.compile(loss='cce',metrics=['accuracy']) | |
#model.summary() | |
return model | |
load_size = 600 | |
crop_size = 600 | |
def _clever_crop(img: tf.Tensor, | |
target_size: Tuple[int]=(128,128), | |
grayscale: bool=False | |
) -> tf.Tensor: | |
"""[summary] | |
Args: | |
img (tf.Tensor): [description] | |
target_size (Tuple[int], optional): [description]. Defaults to (128,128). | |
grayscale (bool, optional): [description]. Defaults to False. | |
Returns: | |
tf.Tensor: [description] | |
""" | |
maxside = tf.math.maximum(tf.shape(img)[0],tf.shape(img)[1]) | |
minside = tf.math.minimum(tf.shape(img)[0],tf.shape(img)[1]) | |
new_img = img | |
if tf.math.divide(maxside,minside) > 1.2: | |
repeating = tf.math.floor(tf.math.divide(maxside,minside)) | |
new_img = img | |
if tf.math.equal(tf.shape(img)[1],minside): | |
for _ in range(int(repeating)): | |
new_img = tf.concat((new_img, img), axis=1) | |
if tf.math.equal(tf.shape(img)[0],minside): | |
for _ in range(int(repeating)): | |
new_img = tf.concat((new_img, img), axis=0) | |
new_img = tf.image.rot90(new_img) | |
else: | |
new_img = img | |
repeating = 0 | |
img = tf.image.resize(new_img, target_size) | |
if grayscale: | |
img = tf.image.rgb_to_grayscale(img) | |
img = tf.image.grayscale_to_rgb(img) | |
return img,repeating | |
def preprocess(img,size=384): | |
img = np.array(img, np.float32) / 255.0 | |
img = tf.image.resize(img, (size, size)) | |
return np.array(img, np.float32) | |
def select_top_n(preds,n=10): | |
top_n = np.argsort(preds)[-n:][::-1] | |
return top_n | |
def parse_results(top_n,logits): | |
results = {} | |
for n in top_n: | |
label = lookup_140[n] | |
results[label] = float(logits[n]) | |
return results | |
def inference_resnet_embedding_beit(x,model,size=576,n_classes=142,n_top=10): | |
cropped = _clever_crop(x,(size,size))[0] | |
prep = preprocess(cropped,size=size) | |
embedding = model.predict(np.array([prep]))[0][0] | |
return embedding | |
def inference_resnet_finer_beit(x,model,size=576,n_classes=142,n_top=10): | |
cropped = _clever_crop(x,(size,size))[0] | |
prep = preprocess(cropped,size=size) | |
logits = tf.nn.softmax(model.predict(np.array([prep]))[1][0]).cpu().numpy() | |
top_n = select_top_n(logits,n=n_top) | |
return parse_results(top_n,logits) |