Spaces:

Unggi
/

gpt-4-vision-for-observational-evaluation

Sleeping

version1 completed

de92875 9 months ago

1.94 kB

	import gradio as gr
	import cv2
	import base64
	import openai

	def process_video(video_file, api_key, instruction):
	# Set the OpenAI API key
	openai.api_key = api_key

	# Read and process the video file
	video = cv2.VideoCapture(video_file.name)
	base64Frames = []
	while video.isOpened():
	success, frame = video.read()
	if not success:
	break
	_, buffer = cv2.imencode(".jpg", frame)
	base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
	video.release()

	PROMPT_MESSAGES = [
	{
	"role": "user",
	"content": [
	instruction,
	*map(lambda x: {"image": x, "resize": 768}, base64Frames[0::10]),
	],
	},
	]

	try:
	result = openai.ChatCompletion.create(
	model="gpt-4-vision-preview",
	messages=PROMPT_MESSAGES,
	api_key=openai.api_key,
	max_tokens=500,
	)
	return result.choices[0].message.content
	except Exception as e:
	return f"Error: {e}"

	# Define the Gradio app
	def main():
	with gr.Blocks() as app:
	gr.Markdown("## Video Narration Generator")
	with gr.Row():
	with gr.Column(scale=1):
	api_key_input = gr.Textbox(label="Enter your OpenAI API Key", lines=1)
	instruction_input = gr.Textbox(label="Enter Narration Instruction", placeholder="Enter your custom instruction here...", lines=5)
	video_upload = gr.File(label="Upload your video", type="file")
	submit_button = gr.Button("Generate Script")
	with gr.Column(scale=1):
	output_box = gr.Textbox(label="Generated Script", lines=7, interactive=False)

	submit_button.click(fn=process_video, inputs=[video_upload, api_key_input, instruction_input], outputs=output_box)

	app.launch()

	if __name__ == "__main__":
	main()