Spaces:

Pixeltable
/

video-to-social-media-post-generator

Running

File size: 7,390 Bytes

827013d
 
b2f0dbe
 
827013d
d76e3c3
b2f0dbe
 
 
 
d294ddd
c0fbf58
b2f0dbe
 
 
304ad78
5746517
f39513f
27c954a
29dacc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
827c577
 
29dacc9
827c577
29dacc9
 
 
 
 
 
 
 
 
 
 
 
 
827c577
 
29dacc9
 
 
 
 
 
 
b2f0dbe
 
304ad78
b2f0dbe
83f9cc7
b2f0dbe
 
 
 
d76e3c3
827c577
 
3f8f0be
b2f0dbe
 
 
 
d76e3c3
827c577
 
b2f0dbe
74b8f0b
d76e3c3
8acae36
 
 
b2f0dbe
2ca0c6c
c0fbf58
83f9cc7
 
09e5e64
d294ddd
e966d07
b2f0dbe
bbaf99c
827013d
b2f0dbe
 
827013d
 
b2f0dbe
 
827013d
bbaf99c
8285890
 
e6e1ceb
a3921b9
8285890
83f9cc7
 
 
 
 
 
 
19589b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
827013d
b2f0dbe
 
 
 
 
 
 
96d75e6
b2f0dbe
 
 
 
 
 
 
 
8acae36
 
9bc3e31
8acae36
827c577
 
b2f0dbe
 
 
 
 
 
 
 
c0fbf58
ddc1de3
d294ddd
f39513f
 
5d3d4ac
 
f39513f
8acae36
f39513f
827013d
 
 
4b4eb33
b2f0dbe
a3921b9

import pixeltable as pxt
import os
import openai
import gradio as gr
import getpass
from pixeltable.iterators import FrameIterator
from pixeltable.functions.video import extract_audio
from pixeltable.functions.audio import get_metadata
from pixeltable.functions import openai

# Store OpenAI API Key

if 'OPENAI_API_KEY' not in os.environ:
    os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:')

MAX_VIDEO_SIZE_MB = 35
CONCURRENCY_LIMIT = 1

def process_and_generate_post(video_file, social_media_type, progress=gr.Progress()):
    
    progress(0, desc="Initializing...")
    
    # Create a Table, a View, and Computed Columns

    pxt.drop_dir('directory', force=True)
    pxt.create_dir('directory')
    
    t = pxt.create_table(
        'directory.video_table', {
        "video": pxt.VideoType(nullable=True),
        "sm_type": pxt.StringType(nullable=True),
        }
    )
    
    frames_view = pxt.create_view(
        "directory.frames",
        t,
        iterator=FrameIterator.create(video=t.video, fps=1)
    )
    
    # Create computed columns to store transformations and persist outputs
    t['audio'] = extract_audio(t.video, format='mp3')
    t['metadata'] = get_metadata(t.audio)
    t['transcription'] = openai.transcriptions(audio=t.audio, model='whisper-1')
    t['transcription_text'] = t.transcription.text

    progress(0.1, desc="Creating UDFs...")
    
    # Custom User-Defined Function (UDF) for Generating Social Media Prompts
    @pxt.udf
    def prompt(A: str, B: str) -> list[dict]:
        system_msg = 'You are an expert in creating social media content and you generate effective post, based on user content. Respect the social media platform guidelines and constraints.'
        user_msg = f'A: "{A}" \n B: "{B}"'
        return [
            {'role': 'system', 'content': system_msg},
            {'role': 'user', 'content': user_msg}
        ]
    
    # Apply the UDF to create a new column
    t['message'] = prompt(t.sm_type, t.transcription_text)
    
    """## Generating Responses with OpenAI's GPT Model"""

    progress(0.2, desc="Calling LLMs")
    
    # # Generate responses using OpenAI's chat completion API
    t['response'] = openai.chat_completions(messages=t.message, model='gpt-4o-mini-2024-07-18', max_tokens=500)
    
    ## Extract the content of the response
    t['answer'] = t.response.choices[0].message.content
    
    if not video_file:
        return "Please upload a video file.", None

    try:
        
        # Check video file size
        video_size = os.path.getsize(video_file) / (1024 * 1024)  # Convert to MB
        if video_size > MAX_VIDEO_SIZE_MB:
            return f"The video file is larger than {MAX_VIDEO_SIZE_MB} MB. Please upload a smaller file.", None

        progress(0.4, desc="Inserting video...")
        
        # # Insert a video into the table. Pixeltable supports referencing external data sources like URLs
        t.insert([{
            "video": video_file,
            "sm_type": social_media_type
        }])

        progress(0.6, desc="Generating posts...")
        
        # Retrieve Social media posts
        social_media_post = t.select(t.answer).tail(1)['answer'][0]

        # Retrieve Audio
        audio = t.select(t.audio).tail(1)['audio'][0]

        # Retrieve thumbnails
        thumbnails = frames_view.select(frames_view.frame).tail(6)['frame']

        progress(0.8, desc="Preparing results...")

        # Retrieve Pixeltable Table containing all videos and stored data
        df_output = t.select(t.transcription_text).tail(1)['transcription_text'][0]

        #Display content
        return social_media_post, thumbnails, df_output, audio

    except Exception as e:
        return f"An error occurred: {str(e)}", None

# Gradio Interface
import gradio as gr

def gradio_interface():
    with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
        gr.Markdown("""
            <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="20%" /></img>
            <h1>Video to Social Media Post Generator</h1>
           """
        )
        gr.HTML(
        """
        <p>
            <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
        </p>
        """
        )
        with gr.Row():
            with gr.Column():  
               gr.Markdown("""
                <ul>
                  <li><strong>Video Data Management:</strong> Creating tables and views to store and organize video data.</li>
                  <li><strong>Automated Video Processing:</strong> Extracting frames and audio from videos.</li>
                  <li><strong>Data Transformation:</strong> Computing and storing metadata, transcriptions, and AI-generated content.</li>
                </ul>
                """)
            with gr.Column():  
               gr.Markdown("""
                <ul>
                  <li><strong>AI Integration:</strong> Utilizing OpenAI's GPT and Whisper models for transcription and content generation.</li>
                  <li><strong>Custom Functions:</strong> Defining user-defined functions (UDFs) for specialized tasks like prompt construction.</li>
                  <li><strong>Data Persistence:</strong> Storing transformed data and AI outputs for easy retrieval and analysis.</li>
                </ul>
                """)

        with gr.Row():
            with gr.Column():
                video_input = gr.Video(
                    label=f"Upload Video File (max {MAX_VIDEO_SIZE_MB} MB):",
                    include_audio=True,
                    max_length=300,
                    height='400px',
                    autoplay=False
                )
                social_media_type = gr.Dropdown(
                    choices=["X (Twitter)", "Facebook", "LinkedIn", "Instagram"],
                    label="Select Social Media Platform:",
                    value="X (Twitter)",
                )
                generate_btn = gr.Button("Generate Post")

                gr.Examples(
            examples=[["example1.mp4"], ["example2.mp4"], ["example3.mp4"]],
            inputs=[video_input]
        )
                audio = gr.Audio(label="Extracted audio", show_download_button=True)
                
            with gr.Column():
                output = gr.Textbox(label="Generated Social Media Post", show_copy_button=True)
                thumbnail = gr.Gallery(
                    label="Pick your favorite Post Thumbnail",
                    show_download_button=True,
                    show_fullscreen_button=True,
                    height='400px'
                )

                df_output = gr.Textbox(label="Transcription", show_copy_button=True)
        
        generate_btn.click(
            fn=process_and_generate_post,
            trigger_mode='once',
            show_progress='full',
            inputs=[video_input, social_media_type],
            outputs=[output, thumbnail, df_output, audio],
        )

    return demo

# Launch the Gradio interface
if __name__ == "__main__":
    gradio_interface().launch(show_api=False)