1littlecoder commited on
Commit
f39513f
1 Parent(s): 304ad78

black formatted

Browse files
Files changed (1) hide show
  1. app.py +139 -71
app.py CHANGED
@@ -7,24 +7,24 @@ from pixeltable.functions import openai as pxop
7
  import openai
8
 
9
  # pixeltable setup
10
- db_directory = 'video_db'
11
- table_name = 'video_table'
12
 
13
  # constants
14
 
15
  MAX_VIDEO_SIZE_MB = 35
16
  GPT_MODEL = "gpt-4o-mini-2024-07-18"
17
  MAX_TOKENS = 500
18
- WHISPER_MODEL = 'whisper-1'
19
 
20
  # Set your OpenAI API key
21
- if 'OPENAI_API_KEY' not in os.environ:
22
- os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')
23
 
24
 
25
- pxt.drop_dir('video_db', force=True)
26
  if table_name in pxt.list_tables():
27
- pxt.drop_table('video_db.video_table')
28
 
29
  # Check if the directory exists, if not, create it
30
  if db_directory not in pxt.list_dirs():
@@ -34,28 +34,36 @@ else:
34
 
35
  # Check if the table exists, if not, create it
36
  if table_name not in pxt.list_tables():
37
- t = pxt.create_table(f'{db_directory}.{table_name}',
38
- {
39
- 'video': pxt.VideoType(),
40
- 'video_filename': pxt.StringType(),
41
- 'sm_type': pxt.StringType(),
42
- 'sm_post': pxt.StringType()
43
- })
 
 
44
 
45
  else:
46
- t = pxt.load_table(f'{db_directory}.{table_name}')
47
  print(f"Table {table_name} already exists. Using the existing table.")
48
 
49
 
50
  # Function to generate social media post using OpenAI GPT-4 API
51
  def generate_social_media_post(transcript_text, social_media_type):
52
  response = openai.chat.completions.create(
53
- model= GPT_MODEL,
54
  messages=[
55
- {"role": "system", "content": f"You are an expert in creating social media content for {social_media_type}."},
56
- {"role": "user", "content": f"Generate an effective and casual social media post based on this video transcript below. Make it a viral and suitable post for {social_media_type}. Transcript:\n{transcript_text}."}
 
 
 
 
 
 
57
  ],
58
- max_tokens=MAX_TOKENS
59
  )
60
  return response.choices[0].message.content
61
 
@@ -72,83 +80,125 @@ def process_and_generate_post(video_file, social_media_type):
72
  video_filename = os.path.basename(video_file)
73
  tr_audio_gen_flag = True
74
  sm_gen_flag = True
75
- print("##################\nthe video file and social media are..."+video_file+"....."+social_media_type)
 
 
 
 
 
76
  video_df = t.where(t.video_filename == video_filename).tail(1)
77
-
78
- if t.select().where(t.video_filename == video_filename).count() >=1:
79
- #print('Video Exists')
80
  tr_audio_gen_flag = False
81
 
82
  # Check if video and sm type exists
83
- video_type_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)
 
 
84
 
85
  if video_type_df:
86
- #print('Video & Type Exists')
87
  sm_gen_flag = False
88
 
89
- #print(video_df)
 
 
90
 
91
- #print('both the cases....')
92
 
93
- #print(video_df and not video_type_df)
94
-
95
- #print(t.select().where(t.video_filename == video_filename).count() >=1 )
96
 
97
- #print(t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).count() >=1 )
98
 
99
- if (t.count() < 1) or not (t.select().where(t.video_filename == video_filename).count() >=1) or (video_df and not video_type_df) :
 
 
 
 
 
 
100
  # Insert video into PixelTable
101
- t.insert([{'video': video_file, 'video_filename': video_filename, 'sm_type': social_media_type, 'sm_post': ''}])
102
-
 
 
 
 
 
 
 
 
 
103
  if tr_audio_gen_flag:
104
  # Extract audio from video
105
-
106
- if not t.get_column(name='audio'):
107
- t['audio'] = extract_audio(t.video, format='mp3')
108
- else:
109
- t.audio = extract_audio(t.video, format='mp3')
110
 
 
 
 
 
111
 
112
  print("########### processing transcription #############")
113
 
114
  # Transcribe audio using OpenAI Whisper API
115
- if not t.get_column(name='transcription'):
116
- t['transcription'] = pxop.transcriptions(t.audio, model= WHISPER_MODEL)
 
 
117
  else:
118
- t.transcription = pxop.transcriptions(t.audio, model= WHISPER_MODEL)
 
 
119
 
120
- #cur_video_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)[0]
 
 
121
 
122
- filtered_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)
123
-
124
  if len(filtered_df) == 0:
125
  return "No matching video found in the table. Please ensure the video is uploaded correctly and try again."
126
-
127
- cur_video_df = filtered_df[0]
128
- plain_text = cur_video_df['transcription']['text']
129
 
130
-
131
- #plain_text = cur_video_df['transcription']['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- #print(t.show())
134
- #print('status of social media type')
135
- #print(t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).count() >=1)
136
- if t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type) & (t.sm_post != '')).count() >=1:
137
-
138
  print("retrieving existing social media post")
139
- social_media_post = t.select(t.sm_post).where((t.sm_type ==social_media_type) & (t.video_filename == video_filename)).collect()['sm_post']
140
- return(social_media_post)
 
 
 
 
 
 
 
141
 
142
  else:
143
 
144
  print("generating new social media post")
145
- social_media_post = generate_social_media_post(plain_text, social_media_type)
 
 
146
  if sm_gen_flag:
147
- cur_video_df.update({'sm_post': social_media_post})
148
 
149
- # print(t.show())
150
 
151
- return cur_video_df['sm_post']
152
 
153
  except Exception as e:
154
  return f"An error occurred: {e}"
@@ -159,24 +209,42 @@ def process_and_generate_post(video_file, social_media_type):
159
  # Gradio Interface
160
  def gradio_interface():
161
  with gr.Blocks(theme=gr.themes.Glass()) as demo:
162
- gr.Markdown("""<center><font size=12>Video to Social Media Post Generator</center>""")
163
- gr.Markdown("""<div align="center">
 
 
 
164
  <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="20%" />
165
- """)
166
- gr.Markdown("""<center><font size=6>Data Ops powered by <a href="https://github.com/pixeltable/pixeltable">Pixeltable</a></center>""")
167
- gr.Markdown("""<center>Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to store, transform, index, and iterate on data for their ML workflows. Data transformations, model inference, and custom logic are embedded as computed columns.
168
- </center>""")
 
 
 
 
 
169
  video_input = gr.Video(label="Upload Video File (max 25 MB):")
170
- social_media_type = gr.Dropdown(choices=["X (Twitter)", "Facebook", "LinkedIn"], label="Select Social Media Platform:", value='X (Twitter)')
 
 
 
 
171
  generate_btn = gr.Button("Generate Post")
172
 
173
  output = gr.Textbox(label="Generated Social Media Post", show_copy_button=True)
174
 
175
- examples = gr.Examples([["example1.mp4"], ["example2.mp4"]], inputs=[video_input])
 
 
176
 
177
- generate_btn.click(fn=process_and_generate_post, inputs=[video_input, social_media_type], outputs=[output])
 
 
 
 
178
 
179
  return demo
180
 
181
 
182
- gradio_interface().launch(show_api=False)
 
7
  import openai
8
 
9
  # pixeltable setup
10
+ db_directory = "video_db"
11
+ table_name = "video_table"
12
 
13
  # constants
14
 
15
  MAX_VIDEO_SIZE_MB = 35
16
  GPT_MODEL = "gpt-4o-mini-2024-07-18"
17
  MAX_TOKENS = 500
18
+ WHISPER_MODEL = "whisper-1"
19
 
20
  # Set your OpenAI API key
21
+ if "OPENAI_API_KEY" not in os.environ:
22
+ os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
23
 
24
 
25
+ pxt.drop_dir("video_db", force=True)
26
  if table_name in pxt.list_tables():
27
+ pxt.drop_table("video_db.video_table")
28
 
29
  # Check if the directory exists, if not, create it
30
  if db_directory not in pxt.list_dirs():
 
34
 
35
  # Check if the table exists, if not, create it
36
  if table_name not in pxt.list_tables():
37
+ t = pxt.create_table(
38
+ f"{db_directory}.{table_name}",
39
+ {
40
+ "video": pxt.VideoType(),
41
+ "video_filename": pxt.StringType(),
42
+ "sm_type": pxt.StringType(),
43
+ "sm_post": pxt.StringType(),
44
+ },
45
+ )
46
 
47
  else:
48
+ t = pxt.load_table(f"{db_directory}.{table_name}")
49
  print(f"Table {table_name} already exists. Using the existing table.")
50
 
51
 
52
  # Function to generate social media post using OpenAI GPT-4 API
53
  def generate_social_media_post(transcript_text, social_media_type):
54
  response = openai.chat.completions.create(
55
+ model=GPT_MODEL,
56
  messages=[
57
+ {
58
+ "role": "system",
59
+ "content": f"You are an expert in creating social media content for {social_media_type}.",
60
+ },
61
+ {
62
+ "role": "user",
63
+ "content": f"Generate an effective and casual social media post based on this video transcript below. Make it a viral and suitable post for {social_media_type}. Transcript:\n{transcript_text}.",
64
+ },
65
  ],
66
+ max_tokens=MAX_TOKENS,
67
  )
68
  return response.choices[0].message.content
69
 
 
80
  video_filename = os.path.basename(video_file)
81
  tr_audio_gen_flag = True
82
  sm_gen_flag = True
83
+ print(
84
+ "##################\nthe video file and social media are..."
85
+ + video_file
86
+ + "....."
87
+ + social_media_type
88
+ )
89
  video_df = t.where(t.video_filename == video_filename).tail(1)
90
+
91
+ if t.select().where(t.video_filename == video_filename).count() >= 1:
92
+ # print('Video Exists')
93
  tr_audio_gen_flag = False
94
 
95
  # Check if video and sm type exists
96
+ video_type_df = t.where(
97
+ (t.video_filename == video_filename) & (t.sm_type == social_media_type)
98
+ ).tail(1)
99
 
100
  if video_type_df:
101
+ # print('Video & Type Exists')
102
  sm_gen_flag = False
103
 
104
+ # print(video_df)
105
+
106
+ # print('both the cases....')
107
 
108
+ # print(video_df and not video_type_df)
109
 
110
+ # print(t.select().where(t.video_filename == video_filename).count() >=1 )
 
 
111
 
112
+ # print(t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).count() >=1 )
113
 
114
+ if (
115
+ (t.count() < 1)
116
+ or not (
117
+ t.select().where(t.video_filename == video_filename).count() >= 1
118
+ )
119
+ or (video_df and not video_type_df)
120
+ ):
121
  # Insert video into PixelTable
122
+ t.insert(
123
+ [
124
+ {
125
+ "video": video_file,
126
+ "video_filename": video_filename,
127
+ "sm_type": social_media_type,
128
+ "sm_post": "",
129
+ }
130
+ ]
131
+ )
132
+
133
  if tr_audio_gen_flag:
134
  # Extract audio from video
 
 
 
 
 
135
 
136
+ if not t.get_column(name="audio"):
137
+ t["audio"] = extract_audio(t.video, format="mp3")
138
+ else:
139
+ t.audio = extract_audio(t.video, format="mp3")
140
 
141
  print("########### processing transcription #############")
142
 
143
  # Transcribe audio using OpenAI Whisper API
144
+ if not t.get_column(name="transcription"):
145
+ t["transcription"] = pxop.transcriptions(
146
+ t.audio, model=WHISPER_MODEL
147
+ )
148
  else:
149
+ t.transcription = pxop.transcriptions(t.audio, model=WHISPER_MODEL)
150
+
151
+ # cur_video_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)[0]
152
 
153
+ filtered_df = t.where(
154
+ (t.video_filename == video_filename) & (t.sm_type == social_media_type)
155
+ ).tail(1)
156
 
 
 
157
  if len(filtered_df) == 0:
158
  return "No matching video found in the table. Please ensure the video is uploaded correctly and try again."
 
 
 
159
 
160
+ cur_video_df = filtered_df[0]
161
+ plain_text = cur_video_df["transcription"]["text"]
162
+
163
+ # plain_text = cur_video_df['transcription']['text']
164
+
165
+ # print(t.show())
166
+ # print('status of social media type')
167
+ # print(t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).count() >=1)
168
+ if (
169
+ t.select()
170
+ .where(
171
+ (t.video_filename == video_filename)
172
+ & (t.sm_type == social_media_type)
173
+ & (t.sm_post != "")
174
+ )
175
+ .count()
176
+ >= 1
177
+ ):
178
 
 
 
 
 
 
179
  print("retrieving existing social media post")
180
+ social_media_post = (
181
+ t.select(t.sm_post)
182
+ .where(
183
+ (t.sm_type == social_media_type)
184
+ & (t.video_filename == video_filename)
185
+ )
186
+ .collect()["sm_post"]
187
+ )
188
+ return social_media_post
189
 
190
  else:
191
 
192
  print("generating new social media post")
193
+ social_media_post = generate_social_media_post(
194
+ plain_text, social_media_type
195
+ )
196
  if sm_gen_flag:
197
+ cur_video_df.update({"sm_post": social_media_post})
198
 
199
+ # print(t.show())
200
 
201
+ return cur_video_df["sm_post"]
202
 
203
  except Exception as e:
204
  return f"An error occurred: {e}"
 
209
  # Gradio Interface
210
  def gradio_interface():
211
  with gr.Blocks(theme=gr.themes.Glass()) as demo:
212
+ gr.Markdown(
213
+ """<center><font size=12>Video to Social Media Post Generator</center>"""
214
+ )
215
+ gr.Markdown(
216
+ """<div align="center">
217
  <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="20%" />
218
+ """
219
+ )
220
+ gr.Markdown(
221
+ """<center><font size=6>Data Ops powered by <a href="https://github.com/pixeltable/pixeltable">Pixeltable</a></center>"""
222
+ )
223
+ gr.Markdown(
224
+ """<center>Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to store, transform, index, and iterate on data for their ML workflows. Data transformations, model inference, and custom logic are embedded as computed columns.
225
+ </center>"""
226
+ )
227
  video_input = gr.Video(label="Upload Video File (max 25 MB):")
228
+ social_media_type = gr.Dropdown(
229
+ choices=["X (Twitter)", "Facebook", "LinkedIn"],
230
+ label="Select Social Media Platform:",
231
+ value="X (Twitter)",
232
+ )
233
  generate_btn = gr.Button("Generate Post")
234
 
235
  output = gr.Textbox(label="Generated Social Media Post", show_copy_button=True)
236
 
237
+ examples = gr.Examples(
238
+ [["example1.mp4"], ["example2.mp4"]], inputs=[video_input]
239
+ )
240
 
241
+ generate_btn.click(
242
+ fn=process_and_generate_post,
243
+ inputs=[video_input, social_media_type],
244
+ outputs=[output],
245
+ )
246
 
247
  return demo
248
 
249
 
250
+ gradio_interface().launch(show_api=False)