from keypoints_extraction import predict_pose
from calculate_measures import calculate_all_measures
from calculate_masks import calculate_seg_mask
from select_body_shape import select_body_shape

import os
os.system("pip install xtcocotools>=1.12")
os.system("pip install 'mmengine>=0.6.0'")
os.system("pip install 'mmcv>=2.0.0rc4,<2.1.0'")
os.system("pip install 'mmdet>=3.0.0,<4.0.0'")
os.system("pip install 'mmpose'")

import gradio as gr

def generate_output(front_img_path, side_img_path):
    # TODO: These file names will need to be unique in case of multiple requests at once, and they will need to be deleted after the function is done.
    front_keypoint_result = predict_pose(front_img_path, "front.jpg")
    side_keypoint_result = predict_pose(side_img_path, "side.jpg")

    # Should we create the image separately? Seems weird to get it as a result from this only to use it in something else below.
    front_image = front_keypoint_result[0]
    side_image = side_keypoint_result[0]

    front_keypoint_data = front_keypoint_result[1]
    side_keypoint_data = side_keypoint_result[1]

    front_seg_mask = calculate_seg_mask(front_img_path)
    side_rcnn_mask = calculate_seg_mask(side_img_path) # TODO: Is this the correct mask? In the original code there is a function called 'get_rcnn_mask' which is not used anywhere. The name implies that it should be a rcnn mask, but the code actually requests a seg mask.

    measures_data_frame = calculate_all_measures(front_image, side_image, front_keypoint_data, side_keypoint_data, front_seg_mask, side_rcnn_mask)

    # TODO: Normalise the measures somehow? Don't understand how this works yet if it is for a single person. Do we need to do this? Or not?
    normalised_measures_data_frame = measures_data_frame

    selected_body_shape = select_body_shape(normalised_measures_data_frame)

    return (selected_body_shape)

input_image_front = gr.inputs.Image(type='pil', label="Front Image")
input_image_side = gr.inputs.Image(type='pil', label="Side Image")
# output_image_front = gr.outputs.Image(type="pil", label="Front Output Image")
# output_text_front = gr.outputs.Textbox(label="Front Output Text")
# output_image_side = gr.outputs.Image(type="pil", label="Front Output Image")
# output_text_side = gr.outputs.Textbox(label="Side Output Text")
output_body_shape = gr.outputs.Textbox(label="Body Shape")

title = "ShopByShape"
iface = gr.Interface(fn=generate_output, inputs=[input_image_front, input_image_side], outputs=[output_body_shape], title=title)
iface.launch()