File size: 3,049 Bytes
9bdccea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bca482d
 
 
 
 
 
 
a734e0b
9bdccea
 
 
 
a734e0b
 
9bdccea
a734e0b
 
 
9bdccea
 
a734e0b
 
 
 
9bdccea
 
 
 
a734e0b
 
9bdccea
 
 
a734e0b
 
 
9bdccea
a734e0b
9bdccea
 
a734e0b
 
9bdccea
 
a734e0b
 
9bdccea
a734e0b
 
9bdccea
a734e0b
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from PIL import Image
import cv2 as cv
import torch
from RealESRGAN import RealESRGAN
import tempfile
import numpy as np
import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def infer_image(img: Image.Image, size_modifier: int ) -> Image.Image:
    if img is None:
        raise Exception("Image not uploaded")
    
    width, height = img.size
    
    if width >= 5000 or height >= 5000:
        raise Exception("The image is too large.")

    model = RealESRGAN(device, scale=size_modifier)
    model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)

    result = model.predict(img.convert('RGB'))
    print(f"Image size ({device}): {size_modifier} ... OK")
    return result

def infer_video(video_filepath: str, size_modifier: int) -> str:
    model = RealESRGAN(device, scale=size_modifier)
    model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
    
    # Extract audio from the original video file
    audio = AudioSegment.from_file(video_filepath, format=video_filepath.split('.')[-1])
    audio_array = np.array(audio.get_array_of_samples())

    # Create a VideoCapture object for the video file
    cap = cv2.VideoCapture(video_filepath)

    # Create a temporary file for the output video
    tmpfile = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
    vid_output = tmpfile.name
    tmpfile.close()

    # Create a VideoWriter object for the output video
    vid_writer = cv2.VideoWriter(
        vid_output,
        fourcc=cv2.VideoWriter.fourcc(*'mp4v'),
        fps=cap.get(cv2.CAP_PROP_FPS),
        frameSize=(int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) * size_modifier, int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) * size_modifier)
    )

    # Process each frame of the video and write it to the output video
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    for i in tqdm(range(n_frames)):
        # Read the next frame
        ret, frame = cap.read()
        if not ret:
            break

        # Convert the frame to RGB and feed it to the model
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = Image.fromarray(frame)
        upscaled_frame = model.predict(frame.convert('RGB'))

        # Convert the upscaled frame back to BGR and write it to the output video
        upscaled_frame = np.array(upscaled_frame)
        upscaled_frame = cv2.cvtColor(upscaled_frame, cv2.COLOR_RGB2BGR)

        # Write the upscaled frame to the output video
        vid_writer.write(upscaled_frame)

    # Release the VideoCapture and VideoWriter objects
    cap.release()
    vid_writer.release()

    # Create a new VideoFileClip object from the output video
    output_clip = mpy.VideoFileClip(vid_output)

    # Add the audio back to the output video
    output_clip = output_clip.set_audio(mpy.AudioFileClip(video_filepath, fps=output_clip.fps))

    # Save the output video to a new file
    output_clip.write_videofile(f'output_{video_filepath}')

    return f'output_{video_filepath}'