|
import torch |
|
import torchvision.transforms as T |
|
from PIL import Image |
|
|
|
|
|
model = torch.hub.load('niki-stha/asl-detection-yolov5', 'yolov5s') |
|
|
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
model.to(device).eval() |
|
|
|
|
|
transform = T.Compose([ |
|
T.Resize((416, 416)), |
|
T.ToTensor(), |
|
]) |
|
|
|
|
|
def run_inference(image): |
|
|
|
image = transform(image).unsqueeze(0).to(device) |
|
|
|
|
|
results = model(image) |
|
|
|
|
|
|
|
predictions = results.pandas().xyxy[0] |
|
|
|
return predictions |
|
|
|
|
|
def inference_api(request): |
|
|
|
image_data = request.files['image'].read() |
|
image = Image.open(io.BytesIO(image_data)) |
|
|
|
|
|
predictions = run_inference(image) |
|
|
|
|
|
|
|
response = { |
|
'predictions': predictions.to_dict(orient='records') |
|
} |
|
|
|
return jsonify(response) |