credify / app /utils /forgery_video_utils.py
abhisheksan's picture
Refactor extract_frames function to decrease default max_frames value
ea26dda
import av
import numpy as np
from PIL import Image
import io
import traceback
from app.utils.file_utils import get_file_content, upload_file_to_firebase, remove_temp_file
import logging
import uuid
from typing import List, Tuple
import librosa
async def extract_audio(firebase_filename: str) -> str:
try:
video_content = get_file_content(firebase_filename)
input_container = av.open(io.BytesIO(video_content))
audio_stream = next((s for s in input_container.streams if s.type == 'audio'), None)
if audio_stream is None:
logging.warning(f"No audio stream found in {firebase_filename}")
return None
logging.info(f"Audio stream found: {audio_stream}")
logging.info(f"Audio codec: {audio_stream.codec_context.name}")
logging.info(f"Audio sample rate: {audio_stream.rate}")
logging.info(f"Audio bit rate: {audio_stream.bit_rate}")
output_buffer = io.BytesIO()
output_container = av.open(output_buffer, mode='w', format='wav')
output_stream = output_container.add_stream('pcm_s16le', rate=audio_stream.rate)
frame_count = 0
for frame in input_container.decode(audio_stream):
frame_count += 1
for packet in output_stream.encode(frame):
output_container.mux(packet)
logging.info(f"Processed {frame_count} audio frames")
# Flush the stream
for packet in output_stream.encode(None):
output_container.mux(packet)
output_container.close()
audio_content = output_buffer.getvalue()
audio_size = len(audio_content)
logging.info(f"Extracted audio size: {audio_size} bytes")
if audio_size < 1024: # Check if audio content is too small (less than 1KB)
logging.warning(f"Extracted audio is too short for {firebase_filename}")
return None
audio_filename = f"{firebase_filename}_audio.wav"
await upload_file_to_firebase(audio_content, audio_filename)
logging.info(f"Audio extracted and uploaded: {audio_filename}")
return audio_filename
except Exception as e:
logging.error(f"Error extracting audio: {str(e)}")
logging.error(traceback.format_exc())
return None
def detect_speech(audio_content: bytes) -> bool:
try:
y, sr = librosa.load(io.BytesIO(audio_content), sr=None)
logging.info(f"Loaded audio with sample rate: {sr}, length: {len(y)}")
# Calculate the root mean square energy
rms = librosa.feature.rms(y=y)[0]
# Calculate the percentage of frames with energy above a threshold
threshold = 0.01 # Adjust this value based on your needs
speech_frames = np.sum(rms > threshold)
speech_percentage = speech_frames / len(rms)
logging.info(f"Speech detection: {speech_percentage:.2%} of frames above threshold")
# If more than 10% of frames have energy above the threshold, consider it speech
is_speech = speech_percentage > 0.1
logging.info(f"Speech detected: {is_speech}")
return is_speech
except Exception as e:
logging.error(f"Error detecting speech: {str(e)}")
logging.error(traceback.format_exc())
return False
async def extract_frames(firebase_filename: str, max_frames: int = 20) -> List[str]:
frames = []
video_content = get_file_content(firebase_filename)
try:
with av.open(io.BytesIO(video_content)) as container:
video_stream = container.streams.video[0]
duration = float(video_stream.duration * video_stream.time_base)
frame_interval = duration / max_frames
for i in range(max_frames):
container.seek(int(i * frame_interval * av.time_base))
for frame in container.decode(video=0):
frame_rgb = frame.to_rgb().to_ndarray()
frame_image = Image.fromarray(frame_rgb)
frame_filename = f"{firebase_filename}_frame_{i}.jpg"
frame_byte_arr = io.BytesIO()
frame_image.save(frame_byte_arr, format='JPEG')
frame_byte_arr = frame_byte_arr.getvalue()
await upload_file_to_firebase(frame_byte_arr, frame_filename)
frames.append(frame_filename)
break # Only take the first frame after seeking
except Exception as e:
logging.error(f"Error extracting frames: {str(e)}")
return frames
async def compress_and_process_video(firebase_filename: str, target_size_mb: int = 50, max_duration: int = 60) -> str:
video_content = get_file_content(firebase_filename)
try:
input_container = av.open(io.BytesIO(video_content))
video_stream = input_container.streams.video[0]
audio_stream = next((s for s in input_container.streams if s.type == 'audio'), None)
# Get video information
width = video_stream.width
height = video_stream.height
duration = float(video_stream.duration * video_stream.time_base)
duration = min(duration, max_duration)
frame_rate = video_stream.average_rate
# Calculate target bitrate
target_size_bits = target_size_mb * 8 * 1024 * 1024
target_bitrate = int(target_size_bits / duration)
# Adjust dimensions
if width > height:
new_width = min(width, 1280)
new_height = int((new_width / width) * height)
else:
new_height = min(height, 720)
new_width = int((new_height / height) * width)
new_width = new_width - (new_width % 2)
new_height = new_height - (new_height % 2)
output_buffer = io.BytesIO()
output_container = av.open(output_buffer, mode='w', format='mp4')
output_video_stream = output_container.add_stream('libx264', rate=frame_rate)
output_video_stream.width = new_width
output_video_stream.height = new_height
output_video_stream.pix_fmt = 'yuv420p'
output_video_stream.bit_rate = target_bitrate
if audio_stream:
output_audio_stream = output_container.add_stream('aac', rate=audio_stream.rate)
output_audio_stream.bit_rate = min(128000, audio_stream.bit_rate or 128000) # 128k bitrate for audio, or lower if original is lower
for packet in input_container.demux((video_stream, audio_stream) if audio_stream else (video_stream,)):
if packet.dts is None:
continue
if packet.stream.type == 'video':
for frame in packet.decode():
if frame.time > duration:
break
new_frame = frame.reformat(width=new_width, height=new_height, format='yuv420p')
for packet in output_video_stream.encode(new_frame):
output_container.mux(packet)
elif packet.stream.type == 'audio' and audio_stream:
for frame in packet.decode():
if frame.time > duration:
break
for packet in output_audio_stream.encode(frame):
output_container.mux(packet)
# Flush streams
for packet in output_video_stream.encode(None):
output_container.mux(packet)
if audio_stream:
for packet in output_audio_stream.encode(None):
output_container.mux(packet)
# Close the output container
output_container.close()
# Get the compressed content
compressed_content = output_buffer.getvalue()
output_filename = f"{firebase_filename}_compressed.mp4"
await upload_file_to_firebase(compressed_content, output_filename)
logging.info(f"Compressed video uploaded to Firebase: {output_filename}")
return output_filename
except Exception as e:
logging.error(f"Error compressing and processing video: {str(e)}")
logging.error(traceback.format_exc())
raise