Spaces:

salman508
/

Action_Based_Video_Classification

Runtime error

App Files Files Community

Action_Based_Video_Classification / app.py

salman508

Update app.py

c216f22 verified over 1 year ago

raw

history blame contribute delete

3.3 kB

	import gradio as gr
	import os
	import numpy as np
	import tensorflow as tf
	from tensorflow import keras
	import pandas as pd
	import cv2

	# Define constants
	IMG_SIZE = 224
	MAX_SEQ_LENGTH = 30
	NUM_FEATURES = 2048

	# Load the trained model
	model_filepath = "lstm_model.h5" # Replace with the actual path
	loaded_model = keras.models.load_model(model_filepath)
	train_df = pd.DataFrame({
	'tag': ['BabyCrawling', 'CricketShot']
	})
	label_processor = keras.layers.StringLookup(num_oov_indices=0, vocabulary=np.unique(train_df["tag"]))
	def crop_center_square(frame):
	y, x = frame.shape[0:2]
	min_dim = min(y, x)
	start_x = (x // 2) - (min_dim // 2)
	start_y = (y // 2) - (min_dim // 2)
	return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]

	def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
	cap = cv2.VideoCapture(path)
	frames = []
	try:
	while True:
	ret, frame = cap.read()
	if not ret:
	break
	frame = crop_center_square(frame)
	frame = cv2.resize(frame, resize)
	frame = frame[:, :, [2, 1, 0]]
	frames.append(frame)

	if len(frames) == max_frames:
	break
	finally:
	cap.release()
	return np.array(frames)
	# Load the feature extractor
	def build_feature_extractor():
	feature_extractor = keras.applications.InceptionV3(
	weights="imagenet",
	include_top=False,
	pooling="avg",
	input_shape=(IMG_SIZE, IMG_SIZE, 3),
	)
	preprocess_input = keras.applications.inception_v3.preprocess_input

	inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
	preprocessed = preprocess_input(inputs)

	outputs = feature_extractor(preprocessed)
	return keras.Model(inputs, outputs, name="feature_extractor")

	feature_extractor = build_feature_extractor()

	# Function for preparing a single video for prediction
	def prepare_single_video(frames):
	frames = frames[None, ...]
	frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
	frame_features = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32")

	for i, batch in enumerate(frames):
	video_length = batch.shape[0]
	length = min(MAX_SEQ_LENGTH, video_length)
	for j in range(length):
	frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
	frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked

	return frame_features, frame_mask

	# Function for making predictions
	def sequence_prediction(video_file):
	class_vocab = label_processor.get_vocabulary()

	# Load the video frames
	frames = load_video(video_file)

	# Prepare the frames for prediction
	frame_features, frame_mask = prepare_single_video(frames)

	# Make predictions using the loaded model
	probabilities = loaded_model.predict([frame_features, frame_mask])[0]

	# Get the predicted label
	predicted_label = class_vocab[np.argmax(probabilities)]

	return predicted_label
	example_list=[
	["video-1.mp4"],
	["video-2.mp4"],
	]
	# Gradio interface
	iface = gr.Interface(
	fn=sequence_prediction,
	inputs=gr.Video(label="Upload a video file"),
	outputs="text",
	examples=example_list,
	)

	# Launch the Gradio app
	iface.launch()