Spaces:

Pixeltable
/

video-to-social-media-post-generator

Running

App Files Files Community

1littlecoder commited on Oct 2

Commit

f39513f

•

1 Parent(s): 304ad78

black formatted

Browse files

Files changed (1) hide show

app.py +139 -71

app.py CHANGED Viewed

@@ -7,24 +7,24 @@ from pixeltable.functions import openai as pxop
 import openai
 # pixeltable setup
-db_directory = 'video_db'
-table_name = 'video_table'
 # constants
 MAX_VIDEO_SIZE_MB = 35
 GPT_MODEL = "gpt-4o-mini-2024-07-18"
 MAX_TOKENS = 500
-WHISPER_MODEL = 'whisper-1'
 # Set your OpenAI API key
-if 'OPENAI_API_KEY' not in os.environ:
-    os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')
-pxt.drop_dir('video_db', force=True)
 if table_name in pxt.list_tables():
-    pxt.drop_table('video_db.video_table')
 # Check if the directory exists, if not, create it
 if db_directory not in pxt.list_dirs():
@@ -34,28 +34,36 @@ else:
 # Check if the table exists, if not, create it
 if table_name not in pxt.list_tables():
-    t = pxt.create_table(f'{db_directory}.{table_name}',
-                         {
-                             'video': pxt.VideoType(),
-                             'video_filename': pxt.StringType(),
-                             'sm_type': pxt.StringType(),
-                             'sm_post': pxt.StringType()
-                         })
 else:
-    t = pxt.load_table(f'{db_directory}.{table_name}')
     print(f"Table {table_name} already exists. Using the existing table.")
 # Function to generate social media post using OpenAI GPT-4 API
 def generate_social_media_post(transcript_text, social_media_type):
     response = openai.chat.completions.create(
-        model= GPT_MODEL,
         messages=[
-            {"role": "system", "content": f"You are an expert in creating social media content for {social_media_type}."},
-            {"role": "user", "content": f"Generate an effective and casual social media post based on this video transcript below. Make it a viral and suitable post for {social_media_type}. Transcript:\n{transcript_text}."}
         ],
-        max_tokens=MAX_TOKENS
     )
     return response.choices[0].message.content
@@ -72,83 +80,125 @@ def process_and_generate_post(video_file, social_media_type):
             video_filename = os.path.basename(video_file)
             tr_audio_gen_flag = True
             sm_gen_flag = True
-            print("##################\nthe video file and social media are..."+video_file+"....."+social_media_type)
             video_df = t.where(t.video_filename == video_filename).tail(1)
-            if t.select().where(t.video_filename == video_filename).count() >=1:
-                #print('Video Exists')
                 tr_audio_gen_flag = False
             # Check if video and sm type exists
-            video_type_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)
             if video_type_df:
-                #print('Video & Type Exists')
                 sm_gen_flag = False
-            #print(video_df)
-            #print('both the cases....')
-            #print(video_df  and not video_type_df)
-            #print(t.select().where(t.video_filename == video_filename).count() >=1 )
-            #print(t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).count() >=1 )
-            if (t.count() < 1) or not (t.select().where(t.video_filename == video_filename).count() >=1) or (video_df  and not video_type_df) :
                 # Insert video into PixelTable
-                t.insert([{'video': video_file, 'video_filename': video_filename, 'sm_type': social_media_type, 'sm_post': ''}])
             if tr_audio_gen_flag:
                 # Extract audio from video
-                if not t.get_column(name='audio'):
-                    t['audio'] = extract_audio(t.video, format='mp3')
-                else:
-                    t.audio = extract_audio(t.video, format='mp3')
                 print("########### processing transcription #############")
                 # Transcribe audio using OpenAI Whisper API
-                if not t.get_column(name='transcription'):
-                    t['transcription'] = pxop.transcriptions(t.audio, model= WHISPER_MODEL)
                 else:
-                    t.transcription = pxop.transcriptions(t.audio, model= WHISPER_MODEL)
-            #cur_video_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)[0]
-            filtered_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)
             if len(filtered_df) == 0:
                 return "No matching video found in the table. Please ensure the video is uploaded correctly and try again."
-            cur_video_df = filtered_df[0]
-            plain_text = cur_video_df['transcription']['text']
-            #plain_text = cur_video_df['transcription']['text']
-            #print(t.show())
-            #print('status of social media type')
-            #print(t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).count() >=1)
-            if t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)  & (t.sm_post != '')).count() >=1:
                 print("retrieving existing social media post")
-                social_media_post = t.select(t.sm_post).where((t.sm_type ==social_media_type) & (t.video_filename == video_filename)).collect()['sm_post']
-                return(social_media_post)
             else:
                 print("generating new social media post")
-                social_media_post = generate_social_media_post(plain_text, social_media_type)
                 if sm_gen_flag:
-                    cur_video_df.update({'sm_post': social_media_post})
-#            print(t.show())
-            return cur_video_df['sm_post']
         except Exception as e:
             return f"An error occurred: {e}"
@@ -159,24 +209,42 @@ def process_and_generate_post(video_file, social_media_type):
 # Gradio Interface
 def gradio_interface():
     with gr.Blocks(theme=gr.themes.Glass()) as demo:
-        gr.Markdown("""<center><font size=12>Video to Social Media Post Generator</center>""")
-        gr.Markdown("""<div align="center">
 <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="20%" />
-""")
-        gr.Markdown("""<center><font size=6>Data Ops powered by <a href="https://github.com/pixeltable/pixeltable">Pixeltable</a></center>""")
-        gr.Markdown("""<center>Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to store, transform, index, and iterate on data for their ML workflows. Data transformations, model inference, and custom logic are embedded as computed columns.
-</center>""")
         video_input = gr.Video(label="Upload Video File (max 25 MB):")
-        social_media_type = gr.Dropdown(choices=["X (Twitter)", "Facebook", "LinkedIn"], label="Select Social Media Platform:", value='X (Twitter)')
         generate_btn = gr.Button("Generate Post")
         output = gr.Textbox(label="Generated Social Media Post", show_copy_button=True)
-        examples = gr.Examples([["example1.mp4"], ["example2.mp4"]], inputs=[video_input])
-        generate_btn.click(fn=process_and_generate_post, inputs=[video_input, social_media_type], outputs=[output])
     return demo
-gradio_interface().launch(show_api=False)

 import openai
 # pixeltable setup
+db_directory = "video_db"
+table_name = "video_table"
 # constants
 MAX_VIDEO_SIZE_MB = 35
 GPT_MODEL = "gpt-4o-mini-2024-07-18"
 MAX_TOKENS = 500
+WHISPER_MODEL = "whisper-1"
 # Set your OpenAI API key
+if "OPENAI_API_KEY" not in os.environ:
+    os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
+pxt.drop_dir("video_db", force=True)
 if table_name in pxt.list_tables():
+    pxt.drop_table("video_db.video_table")
 # Check if the directory exists, if not, create it
 if db_directory not in pxt.list_dirs():
 # Check if the table exists, if not, create it
 if table_name not in pxt.list_tables():
+    t = pxt.create_table(
+        f"{db_directory}.{table_name}",
+        {
+            "video": pxt.VideoType(),
+            "video_filename": pxt.StringType(),
+            "sm_type": pxt.StringType(),
+            "sm_post": pxt.StringType(),
+        },
+    )
 else:
+    t = pxt.load_table(f"{db_directory}.{table_name}")
     print(f"Table {table_name} already exists. Using the existing table.")
 # Function to generate social media post using OpenAI GPT-4 API
 def generate_social_media_post(transcript_text, social_media_type):
     response = openai.chat.completions.create(
+        model=GPT_MODEL,
         messages=[
+            {
+                "role": "system",
+                "content": f"You are an expert in creating social media content for {social_media_type}.",
+            },
+            {
+                "role": "user",
+                "content": f"Generate an effective and casual social media post based on this video transcript below. Make it a viral and suitable post for {social_media_type}. Transcript:\n{transcript_text}.",
+            },
         ],
+        max_tokens=MAX_TOKENS,
     )
     return response.choices[0].message.content
             video_filename = os.path.basename(video_file)
             tr_audio_gen_flag = True
             sm_gen_flag = True
+            print(
+                "##################\nthe video file and social media are..."
+                + video_file
+                + "....."
+                + social_media_type
+            )
             video_df = t.where(t.video_filename == video_filename).tail(1)
+            if t.select().where(t.video_filename == video_filename).count() >= 1:
+                # print('Video Exists')
                 tr_audio_gen_flag = False
             # Check if video and sm type exists
+            video_type_df = t.where(
+                (t.video_filename == video_filename) & (t.sm_type == social_media_type)
+            ).tail(1)
             if video_type_df:
+                # print('Video & Type Exists')
                 sm_gen_flag = False
+            # print(video_df)
+            # print('both the cases....')
+            # print(video_df  and not video_type_df)
+            # print(t.select().where(t.video_filename == video_filename).count() >=1 )
+            # print(t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).count() >=1 )
+            if (
+                (t.count() < 1)
+                or not (
+                    t.select().where(t.video_filename == video_filename).count() >= 1
+                )
+                or (video_df and not video_type_df)
+            ):
                 # Insert video into PixelTable
+                t.insert(
+                    [
+                        {
+                            "video": video_file,
+                            "video_filename": video_filename,
+                            "sm_type": social_media_type,
+                            "sm_post": "",
+                        }
+                    ]
+                )
             if tr_audio_gen_flag:
                 # Extract audio from video
+                if not t.get_column(name="audio"):
+                    t["audio"] = extract_audio(t.video, format="mp3")
+                else:
+                    t.audio = extract_audio(t.video, format="mp3")
                 print("########### processing transcription #############")
                 # Transcribe audio using OpenAI Whisper API
+                if not t.get_column(name="transcription"):
+                    t["transcription"] = pxop.transcriptions(
+                        t.audio, model=WHISPER_MODEL
+                    )
                 else:
+                    t.transcription = pxop.transcriptions(t.audio, model=WHISPER_MODEL)
+            # cur_video_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)[0]
+            filtered_df = t.where(
+                (t.video_filename == video_filename) & (t.sm_type == social_media_type)
+            ).tail(1)
             if len(filtered_df) == 0:
                 return "No matching video found in the table. Please ensure the video is uploaded correctly and try again."
+            cur_video_df = filtered_df[0]
+            plain_text = cur_video_df["transcription"]["text"]
+            # plain_text = cur_video_df['transcription']['text']
+            # print(t.show())
+            # print('status of social media type')
+            # print(t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).count() >=1)
+            if (
+                t.select()
+                .where(
+                    (t.video_filename == video_filename)
+                    & (t.sm_type == social_media_type)
+                    & (t.sm_post != "")
+                )
+                .count()
+                >= 1
+            ):
                 print("retrieving existing social media post")
+                social_media_post = (
+                    t.select(t.sm_post)
+                    .where(
+                        (t.sm_type == social_media_type)
+                        & (t.video_filename == video_filename)
+                    )
+                    .collect()["sm_post"]
+                )
+                return social_media_post
             else:
                 print("generating new social media post")
+                social_media_post = generate_social_media_post(
+                    plain_text, social_media_type
+                )
                 if sm_gen_flag:
+                    cur_video_df.update({"sm_post": social_media_post})
+            #            print(t.show())
+            return cur_video_df["sm_post"]
         except Exception as e:
             return f"An error occurred: {e}"
 # Gradio Interface
 def gradio_interface():
     with gr.Blocks(theme=gr.themes.Glass()) as demo:
+        gr.Markdown(
+            """<center><font size=12>Video to Social Media Post Generator</center>"""
+        )
+        gr.Markdown(
+            """<div align="center">
 <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="20%" />
+"""
+        )
+        gr.Markdown(
+            """<center><font size=6>Data Ops powered by <a href="https://github.com/pixeltable/pixeltable">Pixeltable</a></center>"""
+        )
+        gr.Markdown(
+            """<center>Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to store, transform, index, and iterate on data for their ML workflows. Data transformations, model inference, and custom logic are embedded as computed columns.
+</center>"""
+        )
         video_input = gr.Video(label="Upload Video File (max 25 MB):")
+        social_media_type = gr.Dropdown(
+            choices=["X (Twitter)", "Facebook", "LinkedIn"],
+            label="Select Social Media Platform:",
+            value="X (Twitter)",
+        )
         generate_btn = gr.Button("Generate Post")
         output = gr.Textbox(label="Generated Social Media Post", show_copy_button=True)
+        examples = gr.Examples(
+            [["example1.mp4"], ["example2.mp4"]], inputs=[video_input]
+        )
+        generate_btn.click(
+            fn=process_and_generate_post,
+            inputs=[video_input, social_media_type],
+            outputs=[output],
+        )
     return demo
+gradio_interface().launch(show_api=False)