import cv2 import numpy as np import gradio as gr from tensorflow.keras.models import load_model # Define constants SEQUENCE_LENGTH = 20 # Number of frames to extract IMAGE_HEIGHT = 64 # Height of each frame IMAGE_WIDTH = 64 # Width of each frame CLASSES_LIST = ["Archery", "BabyCrawling", "Balance_Beam", "EyeMakeup", "LipStick"] # Load the model loaded_model = load_model(r"LRCN_model.h5") def frames_extraction(video_reader): frames_list = [] video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) skip_frames_window = max(int(video_frames_count / SEQUENCE_LENGTH), 1) # default skip is 1 for frame_counter in range(SEQUENCE_LENGTH): video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window) success, frame = video_reader.read() if not success: break resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH)) normalized_frame = resized_frame / 255 # color 0-255 frames_list.append(normalized_frame) video_reader.release() return frames_list # Function to classify the video def classify_video(frames): predicted_labels = np.argmax(loaded_model.predict(frames), axis=1) predicted_class_label = CLASSES_LIST[predicted_labels[0]] # Ensure we get the label for the first prediction return predicted_class_label # Define the prediction function def predict_video(video_file): video_capture = cv2.VideoCapture(video_file) features = [] video_reader = video_capture frames = frames_extraction(video_reader) if len(frames) == SEQUENCE_LENGTH: features.append(frames) features = np.asarray(features) predicted_class = classify_video(features) video_capture.release() return predicted_class # Gradio interface definition iface = gr.Interface( fn=predict_video, inputs=gr.Video(), outputs="text", title="Action Recognition with LSTM", description="Upload a video and get the predicted action class." ) # Launch the Gradio interface iface.launch()