import torch import torchvision.transforms as T from PIL import Image # Load the YOLOv5 model model = torch.hub.load('niki-stha/asl-detection-yolov5', 'yolov5s') # Set the device (GPU if available, otherwise CPU) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device).eval() # Define the image transformation transform = T.Compose([ T.Resize((416, 416)), T.ToTensor(), ]) # Inference function def run_inference(image): # Preprocess the image image = transform(image).unsqueeze(0).to(device) # Perform inference results = model(image) # Post-process the results # (You can customize this part based on your specific requirements) predictions = results.pandas().xyxy[0] return predictions # Example API endpoint def inference_api(request): # Get the image from the request (you may need to adapt this based on your API framework) image_data = request.files['image'].read() image = Image.open(io.BytesIO(image_data)) # Run inference predictions = run_inference(image) # Convert predictions to JSON or any other desired format # (You may need to adapt this based on your API framework) response = { 'predictions': predictions.to_dict(orient='records') } return jsonify(response)