|
import gradio as gr |
|
import cv2 |
|
import base64 |
|
import openai |
|
|
|
def process_video(video_file, api_key, instruction): |
|
|
|
openai.api_key = api_key |
|
|
|
|
|
video = cv2.VideoCapture(video_file.name) |
|
base64Frames = [] |
|
while video.isOpened(): |
|
success, frame = video.read() |
|
if not success: |
|
break |
|
_, buffer = cv2.imencode(".jpg", frame) |
|
base64Frames.append(base64.b64encode(buffer).decode("utf-8")) |
|
video.release() |
|
|
|
PROMPT_MESSAGES = [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
instruction, |
|
*map(lambda x: {"image": x, "resize": 768}, base64Frames[0::10]), |
|
], |
|
}, |
|
] |
|
|
|
try: |
|
result = openai.ChatCompletion.create( |
|
model="gpt-4-vision-preview", |
|
messages=PROMPT_MESSAGES, |
|
api_key=openai.api_key, |
|
max_tokens=500, |
|
) |
|
return result.choices[0].message.content |
|
except Exception as e: |
|
return f"Error: {e}" |
|
|
|
|
|
def main(): |
|
with gr.Blocks() as app: |
|
gr.Markdown("## Video Narration Generator") |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
api_key_input = gr.Textbox(label="Enter your OpenAI API Key", lines=1) |
|
instruction_input = gr.Textbox(label="Enter Narration Instruction", placeholder="Enter your custom instruction here...", lines=5) |
|
video_upload = gr.File(label="Upload your video", type="file") |
|
submit_button = gr.Button("Generate Script") |
|
with gr.Column(scale=1): |
|
output_box = gr.Textbox(label="Generated Script", lines=7, interactive=False) |
|
|
|
submit_button.click(fn=process_video, inputs=[video_upload, api_key_input, instruction_input], outputs=output_box) |
|
|
|
app.launch() |
|
|
|
if __name__ == "__main__": |
|
main() |