import cv2
import streamlit as st
import tempfile
import base64
import os
from dotenv import load_dotenv
from openai import OpenAI
import assemblyai as aai
from moviepy.editor import *


# Load environment variables
load_dotenv()
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
OpenAI.api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI()
 
def main():
    st.title('Insightly Video Content Moderation')
 
    # Video upload section
    uploaded_video = st.file_uploader('Upload a video', type=["mp4", "avi", "mov"])
 
    if uploaded_video is not None:
        # Save the video to a temp file
        tfile = tempfile.NamedTemporaryFile(delete=False)
        tfile.write(uploaded_video.read())
        video_file_path = tfile.name
        tfile.close()

        transcriber = aai.Transcriber()
        transcript = transcriber.transcribe(tfile.name)

        # Process the video and display frames in a grid layout
        base64_frames = video_to_base64_frames(video_file_path)
        display_frame_grid(base64_frames[::30])  # Display every 30th frame in a 3-column grid
 
    st.write("Actions:")  # Header for the actions/buttons section

    # Creating four columns to align the buttons
    col1, col2, col3, col4 = st.columns(4)

    with col1:
        if st.button("Description"):
            st.session_state['description'] = generate_description(base64_frames) if 'description' not in st.session_state else st.session_state['description']
    
    with col2:
        if st.button("Frame Description"):
            st.session_state['frame_description'] = generate_frame_description(base64_frames) if 'frame_description' not in st.session_state else st.session_state['frame_description']

    with col3:
        if st.button("Generate Transcript"):
            st.session_state['transcript'] = transcript.text if 'transcript' not in st.session_state else st.session_state['transcript']
    
    with col4:
        if st.button("Category of Video"):
            st.session_state['category'] = generate_category(base64_frames) if 'category' not in st.session_state else st.session_state['category']

    # If any value exists in session state then display it
    if 'description' in st.session_state and st.session_state['description']:
        st.subheader("Video Description")
        st.write(st.session_state['description'])
    
    if 'frame_description' in st.session_state and st.session_state['frame_description']:
        st.subheader("Frame Description")
        st.write(st.session_state['frame_description'])
    
    if 'transcript' in st.session_state and st.session_state['transcript']:
        st.subheader("Video Transcript")
        st.write(st.session_state['transcript'])
    
    if 'category' in st.session_state and st.session_state['category']:
        st.subheader("Video Category")
        st.write(st.session_state['category'])


def video_to_base64_frames(video_file_path):
    # Logic to extract all frames from the video and convert them to base64
    video = cv2.VideoCapture(video_file_path)
    base64_frames = []
 
    while video.isOpened():
        success, frame = video.read()
        if not success:
            break
 
        _, buffer = cv2.imencode('.jpg', frame)
        base64_frame = base64.b64encode(buffer).decode('utf-8')
        base64_frames.append(base64_frame)
 
    video.release()
    return base64_frames

#########################################
#Generate Video description
def generate_description(base64_frames):
    prompt_messages = [
        {
            "role": "user",
            "content": [
                "1. Generate a description for this sequence of video frames in about 90 words.\
                Return the following : 1. List of objects in the video 2. Any restrictive content or sensitive content and if so which frame.",
                *map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]),
            ],
        },
    ]
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=prompt_messages,
        max_tokens=3000,
    )
    return response.choices[0].message.content

#Generate frame description
def generate_frame_description(base64_frames):
    prompt_messages = [
        {
            "role": "user",
            "content": [
                "Describe what is happening in each frame.",
                *map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]),
            ],
        },
    ]
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=prompt_messages,
        max_tokens=3000,
    )
    return response.choices[0].message.content


#Generate Category of Video
def generate_category(base64_frames):
    prompt_messages = [
        {
            "role": "user",
            "content": [
                "What category can this video be tagged to?",
                *map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]),
            ],
        },
    ]
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=prompt_messages,
        max_tokens=3000,
    )
    return response.choices[0].message.content


######################## 
def display_frame_grid(base64_frames):
    cols_per_row = 3
    n_frames = len(base64_frames)
    for idx in range(0, n_frames, cols_per_row):
        cols = st.columns(cols_per_row)
        for col_index in range(cols_per_row):
            frame_idx = idx + col_index
            if frame_idx < n_frames:
                with cols[col_index]:
                    frame = base64_frames[frame_idx]
                    st.image(base64.b64decode(frame), caption=f'Frame {frame_idx * 30 + 1}', width=200)
 
if __name__ == '__main__':
    main()