File size: 3,098 Bytes
a1d71d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import logging
from tempfile import TemporaryFile

import cv2
import numpy as np
from PIL import Image

import tator
import inference


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Read environment variables that are provided from TATOR
host = os.getenv('HOST')
token = os.getenv('TOKEN')
project_id = int(os.getenv('PROJECT_ID'))
media_ids = [int(id_) for id_ in os.getenv('MEDIA_IDS').split(',')]
frames_per_inference = int(os.getenv('FRAMES_PER_INFERENCE', 30))

# Set up the TATOR API.
api = tator.get_api(host, token)

# Iterate through each video.
for media_id in media_ids:

    # Download video.
    media = api.get_media(media_id)
    logger.info(f"Downloading {media.name}...")
    out_path = f"/tmp/{media.name}"
    for progress in tator.util.download_media(api, media, out_path):
        logger.info(f"Download progress: {progress}%")

    # Do inference on each video.
    logger.info(f"Doing inference on {media.name}...")
    localizations = []
    vid = cv2.VideoCapture(out_path)
    frame_number = 0

    # Read *every* frame from the video, break when at the end.
    while True:
        ret, frame = vid.read()
        if not ret:
            break

        # Create a temporary file, access the image data, save data to file.
        framefile = TemporaryFile(suffix='.jpg')
        im = Image.fromarray(frame)
        im.save(framefile)

        # For every N frames, make a prediction; append prediction results
        # to a list, increase the frame count.
        if frame_number % frames_per_inference == 0:

            spec = {}

            # Predictions contains all information inside pandas dataframe
            predictions = inference.run_inference(framefile)

            for i, r in predictions.pandas().xyxy[0].iterrows:

                spec['media_id'] = media_id
                spec['type'] = None # Unsure, docs not specific
                spec['frame'] = frame_number

                x, y, x2, y2 = r['xmin'], r['ymin'], r['xmax'], r['ymax']
                w, h = x2 - x, y2 - y

                spec['x'] = x
                spec['y'] = y
                spec['width'] = w
                spec['height'] = h
                spec['class_category'] = r['name']
                spec['confidence'] = r['confidence']

                localizations.append(spec)

        frame_number += 1

    # End interaction with video properly.
    vid.release()

    logger.info(f"Uploading object detections on {media.name}...")

    # Create the localizations in the video.
    num_created = 0
    for response in tator.util.chunked_create(api.create_localization_list,
                                              project_id,
                                              localization_spec=localizations):
        num_created += len(response.id)

    # Output pretty logging information.
    logger.info(f"Successfully created {num_created} localizations on "
                f"{media.name}!")

    logger.info("-------------------------------------------------")

logger.info(f"Completed inference on {len(media_ids)} files.")