from PIL import Image
import cv2 as cv
import torch
from RealESRGAN import RealESRGAN
import tempfile
import numpy as np
import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def infer_image(img: Image.Image, size_modifier: int ) -> Image.Image:
    if img is None:
        raise Exception("Image not uploaded")
    
    width, height = img.size
    
    if width >= 5000 or height >= 5000:
        raise Exception("The image is too large.")

    model = RealESRGAN(device, scale=size_modifier)
    model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)

    result = model.predict(img.convert('RGB'))
    print(f"Image size ({device}): {size_modifier} ... OK")
    return result

def infer_video(video_filepath: str, size_modifier: int) -> str:
    model = RealESRGAN(device, scale=size_modifier)
    model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
    
    # Extract audio from the original video file
    audio = AudioSegment.from_file(video_filepath, format=video_filepath.split('.')[-1])
    audio_array = np.array(audio.get_array_of_samples())

    # Create a VideoCapture object for the video file
    cap = cv2.VideoCapture(video_filepath)

    # Create a temporary file for the output video
    tmpfile = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
    vid_output = tmpfile.name
    tmpfile.close()

    # Create a VideoWriter object for the output video
    vid_writer = cv2.VideoWriter(
        vid_output,
        fourcc=cv2.VideoWriter.fourcc(*'mp4v'),
        fps=cap.get(cv2.CAP_PROP_FPS),
        frameSize=(int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) * size_modifier, int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) * size_modifier)
    )

    # Process each frame of the video and write it to the output video
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    for i in tqdm(range(n_frames)):
        # Read the next frame
        ret, frame = cap.read()
        if not ret:
            break

        # Convert the frame to RGB and feed it to the model
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = Image.fromarray(frame)
        upscaled_frame = model.predict(frame.convert('RGB'))

        # Convert the upscaled frame back to BGR and write it to the output video
        upscaled_frame = np.array(upscaled_frame)
        upscaled_frame = cv2.cvtColor(upscaled_frame, cv2.COLOR_RGB2BGR)

        # Write the upscaled frame to the output video
        vid_writer.write(upscaled_frame)

    # Release the VideoCapture and VideoWriter objects
    cap.release()
    vid_writer.release()

    # Create a new VideoFileClip object from the output video
    output_clip = mpy.VideoFileClip(vid_output)

    # Add the audio back to the output video
    output_clip = output_clip.set_audio(mpy.AudioFileClip(video_filepath, fps=output_clip.fps))

    # Save the output video to a new file
    output_clip.write_videofile(f'output_{video_filepath}')

    return f'output_{video_filepath}'