|
import pixeltable as pxt |
|
import os |
|
import openai |
|
import gradio as gr |
|
import getpass |
|
from pixeltable.iterators import FrameIterator |
|
from pixeltable.functions.video import extract_audio |
|
from pixeltable.functions.audio import get_metadata |
|
from pixeltable.functions import openai |
|
|
|
if 'OPENAI_API_KEY' not in os.environ: |
|
os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:') |
|
|
|
|
|
pxt.drop_dir('directory', force=True) |
|
pxt.create_dir('directory') |
|
|
|
|
|
t = pxt.create_table( |
|
'directory.video_table', { |
|
"video": pxt.VideoType(nullable=True), |
|
"sm_type": pxt.StringType(nullable=True), |
|
} |
|
) |
|
|
|
|
|
frames_view = pxt.create_view( |
|
"directory.frames", |
|
t, |
|
iterator=FrameIterator.create(video=t.video, num_frames=2) |
|
) |
|
|
|
|
|
t['audio'] = extract_audio(t.video, format='mp3') |
|
t['metadata'] = get_metadata(t.audio) |
|
t['transcription'] = openai.transcriptions(audio=t.audio, model='whisper-1') |
|
t['transcription_text'] = t.transcription.text |
|
|
|
|
|
|
|
@pxt.udf |
|
def prompt(A: str, B: str) -> list[dict]: |
|
return [ |
|
{'role': 'system', 'content': 'You are an expert in creating social media content and you generate effective post, based on the video transcript and the type of social media asked for. Please respect the limitations in terms of characters and size of each social media platform'}, |
|
{'role': 'user', 'content': f'A: "{A}" \n B: "{B}"'} |
|
] |
|
|
|
t['message'] = prompt(t.sm_type, t.transcription_text) |
|
|
|
|
|
t['response'] = openai.chat_completions(messages=t.message, model='gpt-4o-mini-2024-07-18', max_tokens=500) |
|
t['answer'] = t.response.choices[0].message.content |
|
|
|
MAX_VIDEO_SIZE_MB = 35 |
|
|
|
def process_and_generate_post(video_file, social_media_type): |
|
if not video_file: |
|
return "Please upload a video file.", None |
|
|
|
try: |
|
|
|
video_size = os.path.getsize(video_file) / (1024 * 1024) |
|
if video_size > MAX_VIDEO_SIZE_MB: |
|
return f"The video file is larger than {MAX_VIDEO_SIZE_MB} MB. Please upload a smaller file.", None |
|
|
|
|
|
t.insert([{ |
|
"video": video_file, |
|
"sm_type": social_media_type |
|
}]) |
|
|
|
|
|
social_media_post = t.select(t.answer).tail(1)['answer'][0] |
|
|
|
|
|
audio = t.select(t.audio).tail(1)['audio'][0] |
|
|
|
|
|
thumbnails = frames_view.select(frames_view.frame).tail(4)['frame'] |
|
|
|
|
|
df_output = t.collect().to_pandas() |
|
|
|
|
|
return social_media_post, thumbnails, df_output, audio |
|
|
|
except Exception as e: |
|
return f"An error occurred: {str(e)}", None |
|
|
|
|
|
import gradio as gr |
|
|
|
def gradio_interface(): |
|
with gr.Blocks(theme=gr.themes.Monochrome()) as demo: |
|
gr.Markdown( |
|
"""<p> |
|
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="20%" /> |
|
<h1>Video to Social Media Post Generator</h1> |
|
<h3>Key functionalities demonstrated in this example:</h3> |
|
</p> |
|
<ul> |
|
<li><strong>Video Data Management:</strong> Creating tables and views to store and organize video data.</li> |
|
<li><strong>Automated Video Processing:</strong> Extracting frames and audio from videos.</li> |
|
<li><strong>Data Transformation:</strong> Computing and storing metadata, transcriptions, and AI-generated content.</li> |
|
<li><strong>AI Integration:</strong> Utilizing OpenAI's GPT and Whisper models for transcription and content generation.</li> |
|
<li><strong>Custom Functions:</strong> Defining user-defined functions (UDFs) for specialized tasks like prompt construction.</li> |
|
<li><strong>Data Persistence:</strong> Storing transformed data and AI outputs for easy retrieval and analysis.</li> |
|
<li><strong>Gradio Integration:</strong> Creating an interactive web interface for easy user interaction with Pixeltable's functionalities.</li> |
|
</ul> |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
video_input = gr.Video( |
|
label=f"Upload Video File (max {MAX_VIDEO_SIZE_MB} MB):", |
|
include_audio=True, |
|
max_length=300, |
|
height='400px', |
|
autoplay=True |
|
) |
|
social_media_type = gr.Dropdown( |
|
choices=["X (Twitter)", "Facebook", "LinkedIn", "Instagram"], |
|
label="Select Social Media Platform:", |
|
value="X (Twitter)", |
|
) |
|
generate_btn = gr.Button("Generate Post") |
|
|
|
gr.Examples( |
|
examples=[["example1.mp4"], ["example2.mp4"], ["example3.mp4"]], |
|
inputs=[video_input] |
|
) |
|
with gr.Column(): |
|
output = gr.Textbox(label="Generated Social Media Post", show_copy_button=True) |
|
thumbnail = gr.Gallery( |
|
label="Pick your favorite Post Thumbnail", |
|
show_download_button=True, |
|
show_fullscreen_button=True, |
|
height='400px' |
|
) |
|
audio = gr.Audio() |
|
|
|
df_output = gr.DataFrame(label="Pixeltable Table") |
|
|
|
generate_btn.click( |
|
fn=process_and_generate_post, |
|
inputs=[video_input, social_media_type], |
|
outputs=[output, thumbnail, df_output, audio], |
|
) |
|
|
|
gr.HTML( |
|
""" |
|
<div class="footer"> |
|
<p>Pixeltable is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data. Powered solely by <a href="https://github.com/pixeltable/pixeltable" style="text-decoration: underline;" target="_blank">Pixeltable</a> - running OpenAI (gpt-4o-mini-2024-07-18).</a></p> |
|
</div> |
|
""" |
|
) |
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
gradio_interface().launch(show_api=False) |