import gradio as gr import cv2 # Load the video file cap = cv2.VideoCapture('video_file.mp4') # Check if the video has audio if cap.get(cv2.CAP_PROP_AUDIO_STATUS): # Read the video frames while True: ret, frame = cap.read() # Convert the frame to grayscale and apply thresholding gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) # Display the resulting image imshow("Thresholded Image", thresh) # Check if the user presses the 'q' key if cv2.waitKey(1) & 0xFF == ord('q'): # Break out of the loop break else: print("No Audio Found") # Release the video capture cap.release() # Load the audio file audio, sr = librosa.load('audio_file.wav') # Generate a new audio file with the same duration as the video new_audio = np.zeros((len(frame), sr)) for i in range(len(frame)): # Calculate the time stamp for each pixel in the frame t = (i * framerate) + start_time # Add the corresponding value from the audio signal to the new audio array new_audio[i] = audio[int(t)] # Save the new audio file librosa.save('output_file.wav', new_audio, sr)