chiyoi commited on
Commit
139dd3e
1 Parent(s): 72d3c6b
.gitattributes CHANGED
@@ -7,6 +7,7 @@
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
 
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
 
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.keras filter=lfs diff=lfs merge=lfs -text
11
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
12
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
  *.model filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -4,7 +4,10 @@
4
 
5
  # data
6
  assets
7
- out
8
 
9
  # python
10
  __pycache__
 
 
 
 
4
 
5
  # data
6
  assets
7
+ output
8
 
9
  # python
10
  __pycache__
11
+
12
+ # gradio
13
+ flagged
app.py CHANGED
@@ -1,7 +1,39 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  iface.launch()
 
1
  import gradio as gr
2
+ import tensorflow as tf
3
+ from moviepy.editor import VideoFileClip
4
+ from ultralytics import YOLO
5
 
6
+ from core.data import ClassMapping
7
+ from core.model import load_classifier
8
+ from core.inference import FrameProcessor
9
 
10
+ print("Tensorflow version " + tf.__version__)
11
+
12
+ print('Load classifier.')
13
+ classifier_path = 'weights/classifier-7.keras'
14
+ classifier = load_classifier(classifier_path)
15
+
16
+ print('Load detector.')
17
+ detector_path = 'weights/yolov8n.pt'
18
+ detector = YOLO(detector_path)
19
+
20
+ def fn(video: gr.Video):
21
+ print('Process video.')
22
+ output = f'Marked-{str(video)}'
23
+ clip = VideoFileClip(video)
24
+ data_dir = 'storage/dataset'
25
+ id_to_name, _ = ClassMapping(data_dir)
26
+ process_frame = FrameProcessor(detector, classifier, id_to_name)
27
+ clip = clip.fl_image(process_frame)
28
+ clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
29
+ return video
30
+
31
+ inputs = gr.Video(sources=['upload'], label='Input Video')
32
+ outputs = gr.Video(interactive=False, label='Aeroplane Position and Action Marked')
33
+
34
+ iface = gr.Interface(
35
+ fn=fn,
36
+ inputs=inputs,
37
+ outputs=outputs,
38
+ )
39
  iface.launch()
core/data.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import random
3
+ from typing import Literal
4
+ import cv2
5
+ import numpy as np
6
+ import tensorflow as tf
7
+
8
+ training_ratio = 0.7
9
+ validation_ratio = 0.02
10
+ num_frames = 8
11
+ frame_step = 15
12
+ frame_size = (224, 224)
13
+
14
+ def format_frame(frame):
15
+ frame = tf.image.convert_image_dtype(frame, tf.float32)
16
+ frame = tf.image.resize_with_pad(frame, *frame_size)
17
+ return frame
18
+
19
+ def pick_frames(video: str):
20
+ capture = cv2.VideoCapture(video)
21
+ if not capture.isOpened(): raise ValueError('Video file could not be opened.')
22
+ total_frames = capture.get(cv2.CAP_PROP_FRAME_COUNT)
23
+ need_frames = 1 + (num_frames - 1) * frame_step
24
+ if need_frames <= total_frames:
25
+ start = random.randint(0, total_frames - need_frames + 1)
26
+ capture.set(cv2.CAP_PROP_POS_FRAMES, start)
27
+ frames = []
28
+ for _ in range(num_frames):
29
+ for _ in range(frame_step):
30
+ ok, frame = capture.read()
31
+ if ok: frames.append(format_frame(frame))
32
+ else: frames.append(np.zeros(frame_size + (3,)))
33
+ capture.release()
34
+ frames = np.array(frames)
35
+ frames = frames[..., [2, 1, 0]]
36
+ return frames
37
+
38
+ def Data(data_dir: str):
39
+ data_dir = Path(data_dir)
40
+ return {
41
+ 'training':{
42
+ a.name: (lambda ps: ps[:int(len(ps) * training_ratio)])([x for x in a.iterdir()])
43
+ for a in data_dir.iterdir()
44
+ },
45
+ 'validation': {
46
+ a.name: (lambda ps: ps[
47
+ int(len(ps) * training_ratio) :
48
+ int(len(ps) * (training_ratio + validation_ratio))
49
+ ])([x for x in a.iterdir()])
50
+ for a in data_dir.iterdir()
51
+ },
52
+ }
53
+
54
+ def ClassMapping(data_dir: str):
55
+ data_dir = Path(data_dir)
56
+ id_to_name = sorted([x.name for x in data_dir.iterdir()])
57
+ name_to_id = {
58
+ name: i
59
+ for i, name in enumerate(id_to_name)
60
+ }
61
+ return (id_to_name, name_to_id)
62
+
63
+ def FrameGenerator(data_dir: str, split: Literal['training', 'validation']):
64
+ _, name_to_id = ClassMapping(data_dir)
65
+ data = Data(data_dir)
66
+ def generator():
67
+ pairs = [
68
+ (video, class_name)
69
+ for class_name, videos in data[split].items()
70
+ for video in videos
71
+ ]
72
+ random.shuffle(pairs)
73
+ for video, class_name in pairs:
74
+ frames = pick_frames(video)
75
+ label = name_to_id[class_name]
76
+ yield frames, label
77
+ return generator
core/inference.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from imgviz import instances2rgb
2
+ import tensorflow as tf
3
+ import numpy as np
4
+
5
+ from core.data import format_frame
6
+
7
+ # detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
8
+ # actions: list of f'{action_name}: {confidence}'
9
+
10
+ detect_object_frame_steps = 5
11
+ classify_action_frame_steps = 15
12
+ classify_action_num_frames = 8
13
+
14
+ def detect_object(detector, frame):
15
+ result = detector(frame, classes=4, verbose=False)[0]
16
+ classes = result.boxes.cls.numpy()
17
+ boxes = result.boxes.xyxy.numpy()
18
+ predictions = [
19
+ (result.names[classes[i]].capitalize(), boxes[i])
20
+ for i in range(len(classes))
21
+ ]
22
+ detections = (
23
+ [result.names[i].capitalize() for i in classes],
24
+ boxes,
25
+ )
26
+ return detections
27
+
28
+ def classify_action(classifier, frames, id_to_name):
29
+ actions = []
30
+ frames = np.array(frames)
31
+ frames = frames[..., [2, 1, 0]]
32
+ frames = tf.expand_dims(frames, 0)
33
+ output = classifier(frames, training=False)
34
+ confidences = tf.nn.softmax(output).numpy()[0]
35
+ for (class_id, confidence) in enumerate(confidences):
36
+ other_class_id = 2
37
+ if confidence > 0.3 and class_id != other_class_id:
38
+ actions.append(f'{id_to_name[class_id]}: {np.round(confidence, 2)}')
39
+ return actions
40
+
41
+ def draw_boxes(frame, detections, actions):
42
+ (classes, boxes) = detections
43
+ max_area = 0
44
+ max_area_id = 0
45
+ for i, box in enumerate(boxes):
46
+ area = (box[3] - box[1]) * (box[2] - box[0])
47
+ if area > max_area:
48
+ max_area = area
49
+ max_area_id = i
50
+ labels = [0 for _ in classes]
51
+ colormap = [(0x39, 0xc5, 0xbb)]
52
+ line_width = 2
53
+ captions = [
54
+ f'{class_name}\n' + '\n'.join(actions if i == max_area_id else [])
55
+ for (i, class_name) in enumerate(classes)
56
+ ]
57
+ bboxes = [
58
+ [box[1], box[0], box[3], box[2]]
59
+ for box in boxes
60
+ ]
61
+ frame = instances2rgb(
62
+ frame,
63
+ labels=labels,
64
+ captions=captions,
65
+ bboxes=bboxes,
66
+ colormap=colormap,
67
+ font_size=20,
68
+ line_width=line_width,
69
+ )
70
+ return frame
71
+
72
+ def FrameProcessor(detector, classifier, id_to_name):
73
+ current_frame = 0
74
+ frames = []
75
+ actions = []
76
+ detections = ([], [])
77
+ def process_frame(frame):
78
+ nonlocal current_frame, frames, actions, detections
79
+ current_frame += 1
80
+ if current_frame % classify_action_frame_steps == 0:
81
+ frames.append(format_frame(frame))
82
+ if current_frame % detect_object_frame_steps == 0:
83
+ print(f'Detect object: Frame {current_frame}')
84
+ detections = detect_object(detector, frame)
85
+ if len(frames) == classify_action_num_frames:
86
+ print(f'Classify action: Until frame {current_frame}')
87
+ actions = classify_action(classifier, frames, id_to_name)
88
+ frames = []
89
+ frame = draw_boxes(frame, detections, actions)
90
+ return frame
91
+ return process_frame
core/model.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow import keras
3
+ from official.projects.movinet.modeling import movinet
4
+ from official.projects.movinet.modeling import movinet_model
5
+
6
+ model_id = 'a1'
7
+ num_classes = 6
8
+ num_frames = 8
9
+ resolution = 224
10
+
11
+ batch_size = 32
12
+ learning_rate = 0.001
13
+ backbone_trainable = True
14
+
15
+ def build_classifier_with_pretrained_weights(checkpoint_dir: str):
16
+ backbone = movinet.Movinet(model_id=model_id)
17
+ backbone.trainable = backbone_trainable
18
+ model = movinet_model.MovinetClassifier(backbone=backbone, num_classes=600)
19
+ checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
20
+ checkpoint = tf.train.Checkpoint(model=model)
21
+ status = checkpoint.restore(checkpoint_path)
22
+ status.assert_existing_objects_matched()
23
+ model = movinet_model.MovinetClassifier(
24
+ backbone=backbone,
25
+ num_classes=num_classes,
26
+ )
27
+ model.build([batch_size, num_frames, resolution, resolution, 3])
28
+ return model
29
+
30
+ def load_classifier(weights_path: str):
31
+ backbone = movinet.Movinet(model_id=model_id)
32
+ model = movinet_model.MovinetClassifier(
33
+ backbone=backbone,
34
+ num_classes=num_classes,
35
+ )
36
+ model.build([1, num_frames, resolution, resolution, 3])
37
+ model.load_weights(weights_path)
38
+ return model
39
+
40
+ def compile_classifier(model):
41
+ loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
42
+ optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
43
+ model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
44
+ return model
movinet/data.py DELETED
@@ -1,79 +0,0 @@
1
- from pathlib import Path
2
- import random
3
- from typing import Literal
4
- import cv2
5
- import numpy as np
6
- import tensorflow as tf
7
-
8
- TRAINING_RATIO = 0.1
9
- VALIDATION_RATIO = 0.01
10
-
11
- def format_frames(frame, output_size):
12
- frame = tf.image.convert_image_dtype(frame, tf.float32)
13
- frame = tf.image.resize_with_pad(frame, *output_size)
14
- return frame
15
-
16
- def frames_from_video_file(video_path: str, n_frames: int, output_size=(256, 256), frame_step=15):
17
- capture = cv2.VideoCapture(video_path)
18
- if not capture.isOpened(): raise ValueError('Video file could not be opened.')
19
- total_frames = capture.get(cv2.CAP_PROP_FRAME_COUNT)
20
- need_frames = 1 + (n_frames - 1) * frame_step
21
-
22
- if need_frames <= total_frames:
23
- start = random.randint(0, total_frames - need_frames + 1)
24
- capture.set(cv2.CAP_PROP_POS_FRAMES, start)
25
-
26
- frames = []
27
- for _ in range(n_frames - 1):
28
- for _ in range(frame_step):
29
- ok, frame = capture.read()
30
- if ok:
31
- frames.append(format_frames(frame, output_size))
32
- else:
33
- frames.append(np.zeros((output_size[0], output_size[1], 3)))
34
- capture.release()
35
-
36
- frames = np.array(frames)
37
- frames = frames[..., [2, 1, 0]]
38
- return frames
39
-
40
- def Data(data_dir: Path):
41
- return {
42
- 'training':{
43
- a.name: (lambda ps: ps[:int(len(ps) * TRAINING_RATIO)])([x for x in a.iterdir()])
44
- for a in data_dir.iterdir()
45
- },
46
- 'validation': {
47
- a.name: (lambda ps: ps[
48
- int(len(ps) * TRAINING_RATIO) :
49
- int(len(ps) * (TRAINING_RATIO + VALIDATION_RATIO))
50
- ])([x for x in a.iterdir()])
51
- for a in data_dir.iterdir()
52
- },
53
- }
54
-
55
- def frame_generator(data_dir: Path, n_frames: int, split: Literal['training', 'validation']):
56
- class_names = sorted([x.name for x in data_dir.iterdir()])
57
- class_ids_for_name = {
58
- name: i
59
- for i, name in enumerate(class_names)
60
- }
61
- data = Data(data_dir)
62
-
63
- def generator():
64
- pairs = [
65
- (path, name)
66
- for name, paths in data[split].items()
67
- for path in paths
68
- ]
69
- random.shuffle(pairs)
70
- for path, name in pairs:
71
- video_frames = frames_from_video_file(str(path), n_frames)
72
- label = class_ids_for_name[name]
73
- yield video_frames, label
74
- return generator
75
-
76
- def total_steps(data_dir: Path):
77
- data = Data(data_dir)
78
- size = lambda d: sum([len(x) for x in d.values()])
79
- return size(data['training']), size(data['validation'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
playgrounds/load_video.py DELETED
@@ -1,112 +0,0 @@
1
- import random
2
- from typing import *
3
- import numpy as np
4
- import tensorflow as tf
5
- import cv2
6
- from pathlib import Path
7
-
8
- SPLIT_RATIO = 0.7
9
- BATCH_SIZE = 8
10
- NUM_FRAMES = 8
11
-
12
- def main():
13
- data_dir = Path('assets/dataset')
14
- output_signature = (
15
- tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
16
- tf.TensorSpec(shape = (), dtype = tf.int16),
17
- )
18
- train_ds = tf.data.Dataset.from_generator(frame_generator(data_dir, NUM_FRAMES, 'training'), output_signature=output_signature)
19
- train_ds = train_ds.batch(BATCH_SIZE)
20
-
21
-
22
- def format_frames(frame, output_size):
23
- """
24
- Pad and resize an image from a video.
25
-
26
- Args:
27
- frame: Image that needs to resized and padded.
28
- output_size: Pixel size of the output frame image.
29
-
30
- Return:
31
- Formatted frame with padding of specified output size.
32
- """
33
- frame = tf.image.convert_image_dtype(frame, tf.float32)
34
- frame = tf.image.resize_with_pad(frame, *output_size)
35
- return frame
36
-
37
-
38
- def frames_from_video_file(video_path, n_frames, output_size=(224, 224), frame_step=15):
39
- """
40
- Creates frames from each video file present for each category.
41
-
42
- Args:
43
- video_path: File path to the video.
44
- n_frames: Number of frames to be created per video file.
45
- output_size: Pixel size of the output frame image.
46
-
47
- Return:
48
- An NumPy array of frames in the shape of (n_frames, height, width, channels).
49
- """
50
- # Read each video frame by frame
51
- result = []
52
- src = cv2.VideoCapture(str(video_path))
53
-
54
- video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)
55
-
56
- need_length = 1 + (n_frames - 1) * frame_step
57
-
58
- if need_length > video_length:
59
- start = 0
60
- else:
61
- max_start = video_length - need_length
62
- start = random.randint(0, max_start + 1)
63
-
64
- src.set(cv2.CAP_PROP_POS_FRAMES, start)
65
- # ret is a boolean indicating whether read was successful, frame is the image itself
66
- ok, frame = src.read()
67
- if not ok:
68
- raise ValueError('read video not success')
69
- result.append(format_frames(frame, output_size))
70
-
71
- for _ in range(n_frames - 1):
72
- for _ in range(frame_step):
73
- ok, frame = src.read()
74
- if ok:
75
- frame = format_frames(frame, output_size)
76
- result.append(frame)
77
- else:
78
- result.append(np.zeros_like(result[0]))
79
- src.release()
80
- result = np.array(result)[..., [2, 1, 0]]
81
-
82
- return result
83
-
84
- def frame_generator(data_dir: Path, n_frames: int, split: Literal['training', 'validation']):
85
- class_names = sorted([x.name for x in data_dir.iterdir()])
86
- class_ids_for_name = {
87
- name: i
88
- for i, name in enumerate(class_names)
89
- }
90
- data = {
91
- 'training':{
92
- a.name: (lambda ps: ps[:int(len(ps) * SPLIT_RATIO)])([x for x in a.iterdir()])
93
- for a in data_dir.iterdir()
94
- },
95
- 'validation': {
96
- a.name: (lambda ps: ps[int(len(ps) * SPLIT_RATIO):])([x for x in a.iterdir()])
97
- for a in data_dir.iterdir()
98
- },
99
- }
100
-
101
- def generator():
102
- pairs = [
103
- (path, name)
104
- for name, paths in data[split].items()
105
- for path in paths
106
- ]
107
- random.shuffle(pairs)
108
- for path, name in pairs:
109
- video_frames = frames_from_video_file(path, n_frames)
110
- label = class_ids_for_name[name] # Encode labels
111
- yield video_frames, label
112
- return generator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
playgrounds/movinet.py DELETED
@@ -1,80 +0,0 @@
1
- import tensorflow as tf
2
- import numpy as np
3
- import tensorflow_hub as hub
4
- import keras
5
-
6
- labels_path = keras.utils.get_file(
7
- fname='labels.txt',
8
- origin='https://raw.githubusercontent.com/tensorflow/models/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/kinetics_600_labels.txt'
9
- )
10
-
11
- with open(labels_path, 'r', encoding='utf-8') as file:
12
- lines = file.read().splitlines()
13
-
14
- KINETICS_600_LABELS = np.array([line.strip() for line in lines])
15
- KINETICS_600_LABELS[:20]
16
-
17
- def main():
18
- jumping_jack_path = 'assets/jumping_pack.gif'
19
- jumping_jack = load_gif(jumping_jack_path)
20
-
21
- id = 'a2'
22
- mode = 'base'
23
- version = '3'
24
- hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'
25
- model = hub.load(hub_url)
26
- sig = model.signatures['serving_default']
27
- print('Model loaded.')
28
-
29
- sig(image=jumping_jack[tf.newaxis, :1])
30
- logits = sig(image=jumping_jack[tf.newaxis, ...])
31
- logits = logits['classifier_head'][0]
32
- probs = tf.nn.softmax(logits, axis=-1)
33
- for label, p in get_top_k(probs):
34
- print(f'{label:20s}: {p:.3f}')
35
-
36
- def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
37
- """Outputs the top k model labels and probabilities on the given video.
38
-
39
- Args:
40
- probs: probability tensor of shape (num_frames, num_classes) that represents
41
- the probability of each class on each frame.
42
- k: the number of top predictions to select.
43
- label_map: a list of labels to map logit indices to label strings.
44
-
45
- Returns:
46
- a tuple of the top-k labels and probabilities.
47
- """
48
- # Sort predictions to find top_k
49
- top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]
50
- # collect the labels of top_k predictions
51
- top_labels = tf.gather(label_map, top_predictions, axis=-1)
52
- # decode labels
53
- top_labels = [label.decode('utf8') for label in top_labels.numpy()]
54
- # top_k probabilities of the predictions
55
- top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
56
- return tuple(zip(top_labels, top_probs))
57
-
58
- def load_gif(file_path, image_size=(224, 224)):
59
- """Loads a gif file into a TF tensor.
60
-
61
- Use images resized to match what's expected by your model.
62
- The model pages say the "A2" models expect 224 x 224 images at 5 fps
63
-
64
- Args:
65
- file_path: path to the location of a gif file.
66
- image_size: a tuple of target size.
67
-
68
- Returns:
69
- a video of the gif file
70
- """
71
- # Load a gif file, convert it to a TF tensor
72
- raw = tf.io.read_file(file_path)
73
- video = tf.io.decode_gif(raw)
74
- # Resize the video
75
- video = tf.image.resize(video, image_size)
76
- # change dtype to a float32
77
- # Hub models always want images normalized to [0,1]
78
- # ref: https://www.tensorflow.org/hub/common_signatures/images#input
79
- video = tf.cast(video, tf.float32) / 255.
80
- return video
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
playgrounds/verify_metal.py DELETED
@@ -1,14 +0,0 @@
1
- import tensorflow as tf
2
-
3
- cifar = tf.keras.datasets.cifar100
4
- (x_train, y_train), (x_test, y_test) = cifar.load_data()
5
- model = tf.keras.applications.ResNet50(
6
- include_top=True,
7
- weights=None,
8
- input_shape=(32, 32, 3),
9
- classes=100,
10
- )
11
-
12
- loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
13
- model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])
14
- model.fit(x_train, y_train, epochs=5, batch_size=64)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
playgrounds/yolo.py DELETED
@@ -1,40 +0,0 @@
1
- import keras
2
- import keras_cv
3
- import numpy as np
4
- import tensorflow as tf
5
-
6
- from playgrounds.load_video import frames_from_video_file
7
-
8
- def main():
9
- pretrained_model = keras_cv.models.YOLOV8Detector.from_preset(
10
- "yolo_v8_m_pascalvoc", bounding_box_format="xywh"
11
- )
12
- print('Model loaded.')
13
-
14
- inference_resizing = keras_cv.layers.Resizing(
15
- 640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
16
- )
17
-
18
- class_ids = [
19
- "Aeroplane", "Bicycle", "Bird", "Boat", "Bottle", "Bus", "Car", "Cat", "Chair", "Cow", "Dining Table",
20
- "Dog", "Horse", "Motorbike", "Person", "Potted Plant", "Sheep", "Sofa", "Train", "Tvmonitor", "Total",
21
- ]
22
- class_mapping = {i: c for (i, c) in enumerate(class_ids)}
23
-
24
- # raw = tf.io.read_file('assets/IMG_9528.gif')
25
- # video = tf.io.decode_gif(raw)
26
- video = frames_from_video_file('assets/dataset/Flying/2kNjmM8BnD0_230.0_238.0.mp4', 3, (640,640))
27
- image = video[0]
28
- image = (image*255).astype(np.uint8)
29
- file = tf.io.encode_png(image)
30
- tf.io.write_file('out/t.png', file)
31
- # image = keras.utils.load_img('assets/nick-morales-BwYcH78rcpI-unsplash.jpg')
32
- # image = np.array(image)
33
-
34
- image_batch = inference_resizing([image])
35
-
36
- y_pred = pretrained_model.predict(image_batch)
37
- classes = y_pred['classes']
38
- boxes = y_pred["boxes"]
39
- print(f'Classes: {classes}')
40
- print(f'Boxes: {boxes}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- gradio
2
  tensorflow
 
3
  opencv-python
 
 
 
 
 
 
1
  tensorflow
2
+ numpy
3
  opencv-python
4
+ tf-models-official
5
+ ultralytics
6
+ imgviz
7
+ moviepy
weights/classifier-7.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a9436ec0971fe72b53f03d9dd57b89a7c48a4cb82380e14b298c3e2d712f50
3
+ size 25261904
weights/yolov8n.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31e20dde3def09e2cf938c7be6fe23d9150bbbe503982af13345706515f2ef95
3
+ size 6534387