Spaces:

Pixeltable
/

video-to-social-media-post-generator

Running

App Files Files Community

video-to-social-media-post-generator / app.py

PierreBrunelle

Update app.py

ddc1de3 verified 5 months ago

raw

history blame

7.39 kB

	import pixeltable as pxt
	import os
	import openai
	import gradio as gr
	import getpass
	from pixeltable.iterators import FrameIterator
	from pixeltable.functions.video import extract_audio
	from pixeltable.functions.audio import get_metadata
	from pixeltable.functions import openai

	# Store OpenAI API Key

	if 'OPENAI_API_KEY' not in os.environ:
	os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:')

	MAX_VIDEO_SIZE_MB = 35
	CONCURRENCY_LIMIT = 1

	def process_and_generate_post(video_file, social_media_type, progress=gr.Progress()):

	progress(0, desc="Initializing...")

	# Create a Table, a View, and Computed Columns

	pxt.drop_dir('directory', force=True)
	pxt.create_dir('directory')

	t = pxt.create_table(
	'directory.video_table', {
	"video": pxt.VideoType(nullable=True),
	"sm_type": pxt.StringType(nullable=True),
	}
	)

	frames_view = pxt.create_view(
	"directory.frames",
	t,
	iterator=FrameIterator.create(video=t.video, fps=1)
	)

	# Create computed columns to store transformations and persist outputs
	t['audio'] = extract_audio(t.video, format='mp3')
	t['metadata'] = get_metadata(t.audio)
	t['transcription'] = openai.transcriptions(audio=t.audio, model='whisper-1')
	t['transcription_text'] = t.transcription.text

	progress(0.1, desc="Creating UDFs...")

	# Custom User-Defined Function (UDF) for Generating Social Media Prompts
	@pxt.udf
	def prompt(A: str, B: str) -> list[dict]:
	system_msg = 'You are an expert in creating social media content and you generate effective post, based on user content. Respect the social media platform guidelines and constraints.'
	user_msg = f'A: "{A}" \n B: "{B}"'
	return [
	{'role': 'system', 'content': system_msg},
	{'role': 'user', 'content': user_msg}
	]

	# Apply the UDF to create a new column
	t['message'] = prompt(t.sm_type, t.transcription_text)

	"""## Generating Responses with OpenAI's GPT Model"""

	progress(0.2, desc="Calling LLMs")

	# # Generate responses using OpenAI's chat completion API
	t['response'] = openai.chat_completions(messages=t.message, model='gpt-4o-mini-2024-07-18', max_tokens=500)

	## Extract the content of the response
	t['answer'] = t.response.choices[0].message.content

	if not video_file:
	return "Please upload a video file.", None

	try:

	# Check video file size
	video_size = os.path.getsize(video_file) / (1024 * 1024) # Convert to MB
	if video_size > MAX_VIDEO_SIZE_MB:
	return f"The video file is larger than {MAX_VIDEO_SIZE_MB} MB. Please upload a smaller file.", None

	progress(0.4, desc="Inserting video...")

	# # Insert a video into the table. Pixeltable supports referencing external data sources like URLs
	t.insert([{
	"video": video_file,
	"sm_type": social_media_type
	}])

	progress(0.6, desc="Generating posts...")

	# Retrieve Social media posts
	social_media_post = t.select(t.answer).tail(1)['answer'][0]

	# Retrieve Audio
	audio = t.select(t.audio).tail(1)['audio'][0]

	# Retrieve thumbnails
	thumbnails = frames_view.select(frames_view.frame).tail(6)['frame']

	progress(0.8, desc="Preparing results...")

	# Retrieve Pixeltable Table containing all videos and stored data
	df_output = t.select(t.transcription_text).tail(1)['transcription_text'][0]

	#Display content
	return social_media_post, thumbnails, df_output, audio

	except Exception as e:
	return f"An error occurred: {str(e)}", None

	# Gradio Interface
	import gradio as gr

	def gradio_interface():
	with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
	gr.Markdown("""
	<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="20%" /></img>
	<h1>Video to Social Media Post Generator</h1>
	"""
	)
	gr.HTML(
	"""
	<p>
	<a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
	</p>
	"""
	)
	with gr.Row():
	with gr.Column():
	gr.Markdown("""
	<ul>
	<li><strong>Video Data Management:</strong> Creating tables and views to store and organize video data.</li>
	<li><strong>Automated Video Processing:</strong> Extracting frames and audio from videos.</li>
	<li><strong>Data Transformation:</strong> Computing and storing metadata, transcriptions, and AI-generated content.</li>
	</ul>
	""")
	with gr.Column():
	gr.Markdown("""
	<ul>
	<li><strong>AI Integration:</strong> Utilizing OpenAI's GPT and Whisper models for transcription and content generation.</li>
	<li><strong>Custom Functions:</strong> Defining user-defined functions (UDFs) for specialized tasks like prompt construction.</li>
	<li><strong>Data Persistence:</strong> Storing transformed data and AI outputs for easy retrieval and analysis.</li>
	</ul>
	""")

	with gr.Row():
	with gr.Column():
	video_input = gr.Video(
	label=f"Upload Video File (max {MAX_VIDEO_SIZE_MB} MB):",
	include_audio=True,
	max_length=300,
	height='400px',
	autoplay=False
	)
	social_media_type = gr.Dropdown(
	choices=["X (Twitter)", "Facebook", "LinkedIn", "Instagram"],
	label="Select Social Media Platform:",
	value="X (Twitter)",
	)
	generate_btn = gr.Button("Generate Post")

	gr.Examples(
	examples=[["example1.mp4"], ["example2.mp4"], ["example3.mp4"]],
	inputs=[video_input]
	)
	audio = gr.Audio(label="Extracted audio", show_download_button=True)

	with gr.Column():
	output = gr.Textbox(label="Generated Social Media Post", show_copy_button=True)
	thumbnail = gr.Gallery(
	label="Pick your favorite Post Thumbnail",
	show_download_button=True,
	show_fullscreen_button=True,
	height='400px'
	)

	df_output = gr.Textbox(label="Transcription", show_copy_button=True)

	generate_btn.click(
	fn=process_and_generate_post,
	trigger_mode='once',
	show_progress='full',
	inputs=[video_input, social_media_type],
	outputs=[output, thumbnail, df_output, audio],
	)

	return demo

	# Launch the Gradio interface
	if __name__ == "__main__":
	gradio_interface().launch(show_api=False)