Spaces:
Runtime error
Runtime error
File size: 5,784 Bytes
ad76407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import cv2
import streamlit as st
import tempfile
import base64
import os
from dotenv import load_dotenv
from openai import OpenAI
import assemblyai as aai
from moviepy.editor import *
# Load environment variables
load_dotenv()
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
OpenAI.api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI()
def main():
st.title('Insightly Video Content Moderation')
# Video upload section
uploaded_video = st.file_uploader('Upload a video', type=["mp4", "avi", "mov"])
if uploaded_video is not None:
# Save the video to a temp file
tfile = tempfile.NamedTemporaryFile(delete=False)
tfile.write(uploaded_video.read())
video_file_path = tfile.name
tfile.close()
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(tfile.name)
# Process the video and display frames in a grid layout
base64_frames = video_to_base64_frames(video_file_path)
display_frame_grid(base64_frames[::30]) # Display every 30th frame in a 3-column grid
st.write("Actions:") # Header for the actions/buttons section
# Creating four columns to align the buttons
col1, col2, col3, col4 = st.columns(4)
with col1:
if st.button("Description"):
st.session_state['description'] = generate_description(base64_frames) if 'description' not in st.session_state else st.session_state['description']
with col2:
if st.button("Frame Description"):
st.session_state['frame_description'] = generate_frame_description(base64_frames) if 'frame_description' not in st.session_state else st.session_state['frame_description']
with col3:
if st.button("Generate Transcript"):
st.session_state['transcript'] = transcript.text if 'transcript' not in st.session_state else st.session_state['transcript']
with col4:
if st.button("Category of Video"):
st.session_state['category'] = generate_category(base64_frames) if 'category' not in st.session_state else st.session_state['category']
# If any value exists in session state then display it
if 'description' in st.session_state and st.session_state['description']:
st.subheader("Video Description")
st.write(st.session_state['description'])
if 'frame_description' in st.session_state and st.session_state['frame_description']:
st.subheader("Frame Description")
st.write(st.session_state['frame_description'])
if 'transcript' in st.session_state and st.session_state['transcript']:
st.subheader("Video Transcript")
st.write(st.session_state['transcript'])
if 'category' in st.session_state and st.session_state['category']:
st.subheader("Video Category")
st.write(st.session_state['category'])
def video_to_base64_frames(video_file_path):
# Logic to extract all frames from the video and convert them to base64
video = cv2.VideoCapture(video_file_path)
base64_frames = []
while video.isOpened():
success, frame = video.read()
if not success:
break
_, buffer = cv2.imencode('.jpg', frame)
base64_frame = base64.b64encode(buffer).decode('utf-8')
base64_frames.append(base64_frame)
video.release()
return base64_frames
#########################################
#Generate Video description
def generate_description(base64_frames):
prompt_messages = [
{
"role": "user",
"content": [
"1. Generate a description for this sequence of video frames in about 90 words.\
Return the following : 1. List of objects in the video 2. Any restrictive content or sensitive content and if so which frame.",
*map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]),
],
},
]
response = client.chat.completions.create(
model="gpt-4-vision-preview",
messages=prompt_messages,
max_tokens=3000,
)
return response.choices[0].message.content
#Generate frame description
def generate_frame_description(base64_frames):
prompt_messages = [
{
"role": "user",
"content": [
"Describe what is happening in each frame.",
*map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]),
],
},
]
response = client.chat.completions.create(
model="gpt-4-vision-preview",
messages=prompt_messages,
max_tokens=3000,
)
return response.choices[0].message.content
#Generate Category of Video
def generate_category(base64_frames):
prompt_messages = [
{
"role": "user",
"content": [
"What category can this video be tagged to?",
*map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]),
],
},
]
response = client.chat.completions.create(
model="gpt-4-vision-preview",
messages=prompt_messages,
max_tokens=3000,
)
return response.choices[0].message.content
########################
def display_frame_grid(base64_frames):
cols_per_row = 3
n_frames = len(base64_frames)
for idx in range(0, n_frames, cols_per_row):
cols = st.columns(cols_per_row)
for col_index in range(cols_per_row):
frame_idx = idx + col_index
if frame_idx < n_frames:
with cols[col_index]:
frame = base64_frames[frame_idx]
st.image(base64.b64decode(frame), caption=f'Frame {frame_idx * 30 + 1}', width=200)
if __name__ == '__main__':
main() |