mhm commited on
Commit
48dc89b
1 Parent(s): 676f74a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +333 -40
app.py CHANGED
@@ -1,54 +1,347 @@
1
- import gradio as gr
2
- from gradio_client import Client, file
3
  import os
4
  import shutil
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
 
 
 
 
6
 
7
- client_gen_image = Client("AP123/SDXL-Lightning")
8
- client_face_swap = Client("craftgamesnetwork/face-swap")
 
 
9
 
10
- def generate_and_swap(text_input, source_image_path):
11
-
12
- gen_result = client_gen_image.predict(
13
- text_input, # Text input
14
- "4-Step", # Inference steps
15
- api_name="/generate_image"
16
- )
17
- print("Image generated successfully.")
18
 
19
-
20
- generated_image_path = "generated_image.png"
21
- shutil.move(gen_result, generated_image_path)
22
 
23
-
24
- swap_result_path = client_face_swap.predict(
25
- file(generated_image_path),
26
- file(source_image_path),
27
- api_name="/predict"
28
- )
29
- print("Faces swapped successfully.")
30
 
31
-
32
- with open(swap_result_path, "rb") as f:
33
- swap_result_content = f.read()
34
 
35
-
36
- swapped_image_path = "final_image.png"
37
- with open(swapped_image_path, "wb") as f:
38
- f.write(swap_result_content)
39
 
40
- print("Swapped image saved as:", swapped_image_path)
41
- return swapped_image_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  iface = gr.Interface(
44
- generate_and_swap,
45
- [
46
- gr.Textbox(label="Enter your prompt (English):"),
47
- gr.Image(type="filepath", label="Upload your source image:")
48
- ],
49
- "image",
50
- description="Generate free AI image with your or any face. Support me in making better AI codes as I am a solo developer [Click here to Donate](https://nowpayments.io/donation/aheed) Contact me for bulk processing and better AI software +92-332-4399819 Please do not duplicate this space without permission",
51
- css="footer {visibility: hidden}",
52
- title="AI Image with Any Face"
53
  )
 
54
  iface.launch()
 
1
+ import json
 
2
  import os
3
  import shutil
4
+ import subprocess
5
+ import sys
6
+ import time
7
+ import math
8
+ import cv2
9
+ import requests
10
+ from pydub import AudioSegment
11
+ import numpy as np
12
+ from dotenv import load_dotenv
13
+ import gradio as gr
14
+
15
+ # Load environment variables from .env file
16
+ load_dotenv(override=True)
17
+
18
+ # Read API keys from environment variables
19
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
20
+ LEMONFOX_API_KEY = os.getenv("LEMONFOX_API_KEY")
21
+
22
+ narration_api = "openai"
23
+
24
+ def parse(narration):
25
+ data = []
26
+ narrations = []
27
+ lines = narration.split("\n")
28
+ for line in lines:
29
+ if line.startswith('Narrator: '):
30
+ text = line.replace('Narrator: ', '')
31
+ data.append({
32
+ "type": "text",
33
+ "content": text.strip('"'),
34
+ })
35
+ narrations.append(text.strip('"'))
36
+ elif line.startswith('['):
37
+ background = line.strip('[]')
38
+ data.append({
39
+ "type": "image",
40
+ "description": background,
41
+ })
42
+ return data, narrations
43
+
44
+ def create(data, output_folder, voice="shimmer"): # Add voice parameter with default value
45
+ if not os.path.exists(output_folder):
46
+ os.makedirs(output_folder)
47
+
48
+ n = 0
49
+ for element in data:
50
+ if element["type"] != "text":
51
+ continue
52
+
53
+ n += 1
54
+ output_file = os.path.join(output_folder, f"narration_{n}.mp3")
55
+
56
+ if narration_api == "openai":
57
+ tts_url = 'https://api.openai.com/v1/audio/speech'
58
+ headers = {
59
+ 'Authorization': f'Bearer {OPENAI_API_KEY}',
60
+ 'Content-Type': 'application/json'
61
+ }
62
+ payload = {
63
+ "model": "tts-1",
64
+ "input": element["content"],
65
+ "voice": voice # Use the selected voice here
66
+ }
67
+ response = requests.post(tts_url, json=payload, headers=headers)
68
+
69
+ if response.status_code == 200:
70
+ with open(output_file, "wb") as f:
71
+ f.write(response.content)
72
+ else:
73
+ print(f"Failed to generate audio for prompt: {element['content']}. Status Code: {response.status_code}")
74
+
75
+ def generate(prompt, output_file, size="576x1024"):
76
+ url = 'https://api.lemonfox.ai/v1/images/generations'
77
+ headers = {
78
+ 'Authorization': LEMONFOX_API_KEY,
79
+ 'Content-Type': 'application/json'
80
+ }
81
+ data = {
82
+ 'prompt': prompt,
83
+ 'size': size,
84
+ 'n': 1
85
+ }
86
+
87
+ try:
88
+ response = requests.post(url, json=data, headers=headers)
89
+ if response.ok:
90
+ response_data = response.json()
91
+ if 'data' in response_data and len(response_data['data']) > 0:
92
+ image_info = response_data['data'][0]
93
+ image_url = image_info['url']
94
+
95
+ image_response = requests.get(image_url)
96
+ with open(output_file, 'wb') as f:
97
+ f.write(image_response.content)
98
+
99
+ else:
100
+ print(f"No image data found for prompt: {prompt}")
101
+ else:
102
+ print(f"Failed to generate image for prompt: {prompt}. Status Code: {response.status_code}")
103
+ except Exception as e:
104
+ print(f"Error occurred while processing prompt: {prompt}")
105
+ print(str(e))
106
+
107
+ def create_from_data(data, output_dir):
108
+ if not os.path.exists(output_dir):
109
+ os.makedirs(output_dir)
110
+
111
+ image_number = 0
112
+ for element in data:
113
+ if element["type"] != "image":
114
+ continue
115
+ image_number += 1
116
+ image_name = f"image_{image_number}.webp"
117
+ generate(element["description"], os.path.join(output_dir, image_name))
118
+
119
+ def get_audio_duration(audio_file):
120
+ return len(AudioSegment.from_file(audio_file))
121
+
122
+ def resize_image(image, width, height):
123
+ aspect_ratio = image.shape[1] / image.shape[0]
124
+
125
+ if aspect_ratio > (width / height):
126
+ new_width = width
127
+ new_height = int(width / aspect_ratio)
128
+ else:
129
+ new_height = height
130
+ new_width = int(height * aspect_ratio)
131
+
132
+ return cv2.resize(image, (new_width, new_height))
133
+
134
+ def write_text(text, frame, video_writer):
135
+ font = cv2.FONT_HERSHEY_SIMPLEX
136
+ white_color = (255, 255, 255)
137
+ black_color = (0, 0, 0)
138
+ thickness = 10
139
+ font_scale = 3
140
+ border = 5
141
+
142
+ text_size = cv2.getTextSize(text, font, font_scale, thickness)[0]
143
+ text_x = (frame.shape[1] - text_size[0]) // 2
144
+ text_y = (frame.shape[0] + text_size[1]) // 2
145
+ org = (text_x, text_y)
146
+
147
+ frame = cv2.putText(frame, text, org, font, font_scale, black_color, thickness + border * 2, cv2.LINE_AA)
148
+ frame = cv2.putText(frame, text, org, font, font_scale, white_color, thickness, cv2.LINE_AA)
149
+
150
+ video_writer.write(frame)
151
+
152
+ def add_narration_to_video(narrations, input_video, output_dir, output_file, text_color, text_position):
153
+ offset = 50
154
+ cap = cv2.VideoCapture(input_video)
155
+ temp_video = os.path.join(output_dir, "with_transcript.mp4") # Change file extension to MP4
156
+ out = cv2.VideoWriter(temp_video, cv2.VideoWriter_fourcc(*'mp4v'), 30, (int(cap.get(3)), int(cap.get(4))))
157
+
158
+ full_narration = AudioSegment.empty()
159
+
160
+ for i, narration in enumerate(narrations):
161
+ audio = os.path.join(output_dir, "narrations", f"narration_{i+1}.mp3")
162
+ duration = get_audio_duration(audio)
163
+ narration_frames = math.floor(duration / 1000 * 30)
164
+
165
+ full_narration += AudioSegment.from_file(audio)
166
+
167
+ char_count = len(narration.replace(" ", ""))
168
+ ms_per_char = duration / char_count
169
 
170
+ frames_written = 0
171
+ words = narration.split(" ")
172
+ for w, word in enumerate(words):
173
+ word_ms = len(word) * ms_per_char
174
 
175
+ if i == 0 and w == 0:
176
+ word_ms -= offset
177
+ if word_ms < 0:
178
+ word_ms = 0
179
 
180
+ for _ in range(math.floor(word_ms/1000*30)):
181
+ ret, frame = cap.read()
182
+ if not ret:
183
+ break
184
+ write_text(word, frame, out)
185
+ frames_written += 1
 
 
186
 
187
+ for _ in range(narration_frames - frames_written):
188
+ ret, frame = cap.read()
189
+ out.write(frame)
190
 
191
+ while out.isOpened():
192
+ ret, frame = cap.read()
193
+ if not ret:
194
+ break
195
+ out.write(frame)
 
 
196
 
197
+ temp_narration = os.path.join(output_dir, "narration.mp3")
198
+ full_narration.export(temp_narration, format="mp3")
 
199
 
200
+ cap.release()
201
+ out.release()
202
+ cv2.destroyAllWindows()
 
203
 
204
+ ffmpeg_command = [
205
+ 'ffmpeg',
206
+ '-y',
207
+ '-i', temp_video,
208
+ '-i', temp_narration,
209
+ '-map', '0:v',
210
+ '-map', '1:a',
211
+ '-c:v', 'libx264', # Use H.264 codec
212
+ '-c:a', 'aac',
213
+ '-strict', 'experimental',
214
+ os.path.join(output_dir, output_file)
215
+ ]
216
+
217
+ subprocess.run(ffmpeg_command, capture_output=True)
218
+
219
+ os.remove(temp_video)
220
+ os.remove(temp_narration)
221
+
222
+ def create_video(narrations, output_dir, output_file, text_color, text_position):
223
+ width, height = 1080, 1920
224
+ frame_rate = 30
225
+ fade_time = 1000
226
+
227
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Change codec to MP4V
228
+ temp_video = os.path.join(output_dir, "temp_video.mp4") # Change file extension to MP4
229
+ out = cv2.VideoWriter(temp_video, fourcc, frame_rate, (width, height))
230
+
231
+ image_paths = os.listdir(os.path.join(output_dir, "images"))
232
+ image_count = len(image_paths)
233
+
234
+ for i in range(image_count):
235
+ image1 = cv2.imread(os.path.join(output_dir, "images", f"image_{i+1}.webp"))
236
+
237
+ if i+1 < image_count:
238
+ image2 = cv2.imread(os.path.join(output_dir, "images", f"image_{i+2}.webp"))
239
+ else:
240
+ image2 = cv2.imread(os.path.join(output_dir, "images", f"image_1.webp"))
241
+
242
+ image1 = resize_image(image1, width, height)
243
+ image2 = resize_image(image2, width, height)
244
+
245
+ narration = os.path.join(output_dir, "narrations", f"narration_{i+1}.mp3")
246
+ duration = get_audio_duration(narration)
247
+
248
+ if i > 0:
249
+ duration -= fade_time
250
+
251
+ if i == image_count-1:
252
+ duration -= fade_time
253
+
254
+ for _ in range(math.floor(duration/1000*30)):
255
+ vertical_video_frame = np.zeros((height, width, 3), dtype=np.uint8)
256
+ vertical_video_frame[:image1.shape[0], :] = image1
257
+
258
+ out.write(vertical_video_frame)
259
+
260
+ for alpha in np.linspace(0, 1, math.floor(fade_time/1000*30)):
261
+ blended_image = cv2.addWeighted(image1, 1 - alpha, image2, alpha, 0)
262
+ vertical_video_frame = np.zeros((height, width, 3), dtype=np.uint8)
263
+ vertical_video_frame[:image1.shape[0], :] = blended_image
264
+
265
+ out.write(vertical_video_frame)
266
+
267
+ out.release()
268
+ cv2.destroyAllWindows()
269
+
270
+ add_narration_to_video(narrations, temp_video, output_dir, output_file, text_color, text_position)
271
+ os.remove(temp_video)
272
+
273
+ def generate_video(topic, voice="shimmer"):
274
+ short_id = str(int(time.time()))
275
+ basedir = os.path.join("shorts", short_id)
276
+ if not os.path.exists(basedir):
277
+ os.makedirs(basedir)
278
+
279
+ filename = topic.replace("_", " ").replace("/", "_").replace(".", "_")
280
+ output_file = f"{filename}.mp4" # Change file extension to MP4
281
+
282
+ chat_url = 'https://api.openai.com/v1/chat/completions'
283
+ headers = {
284
+ 'Authorization': f'Bearer {OPENAI_API_KEY}',
285
+ 'Content-Type': 'application/json'
286
+ }
287
+ payload = {
288
+ "model": "gpt-3.5-turbo",
289
+ "messages": [
290
+ {
291
+ "role": "system",
292
+ "content": "You are a viral youTube short video creator."
293
+ },
294
+ {
295
+ "role": "user",
296
+ "content": f"""Make a 60 second video on: \n\n{topic} and you will need to generate a very short description of images for each of the scenes. They will be used for background AI images. Note that the script will be fed into a text-to-speech engine, so dont use special characters. Respond with a pair of an image prompt in square brackets and a script below it. Both of them should be on their own lines, as follows:
297
+ ###
298
+ [Description of a background image]
299
+ Narrator: "Sentence of narration"
300
+ ###"""
301
+ }
302
+ ]
303
+ }
304
+ response = requests.post(chat_url, json=payload, headers=headers)
305
+
306
+ if response.status_code == 200:
307
+ response_text = response.json()['choices'][0]['message']['content']
308
+ response_text = response_text.replace("’", "'").replace("`", "'").replace("…", "...").replace("“", '"').replace("”", '"')
309
+
310
+ with open(os.path.join(basedir, f"response.txt"), "a") as f:
311
+ f.write(response_text + "\n")
312
+
313
+ data, narrations = parse(response_text)
314
+ with open(os.path.join(basedir, f"data.json"), "a") as f:
315
+ json.dump(data, f, ensure_ascii=False)
316
+ f.write("\n")
317
+
318
+ print(f"Generating narration for: {topic}...")
319
+ create(data, os.path.join(basedir, f"narrations"), voice=voice)
320
+
321
+ print("Generating images...")
322
+ create_from_data(data, os.path.join(basedir, f"images"))
323
+
324
+ print("Generating video...")
325
+ create_video(narrations, basedir, output_file, text_color="white", text_position="center")
326
+
327
+ print("Deleting files and folders...")
328
+ os.remove(os.path.join(basedir, "response.txt"))
329
+ os.remove(os.path.join(basedir, "data.json"))
330
+ shutil.rmtree(os.path.join(basedir, "narrations"))
331
+ shutil.rmtree(os.path.join(basedir, "images"))
332
+
333
+ print(f"DONE! Here's your video: {os.path.join(basedir, output_file)}")
334
+ return os.path.join(basedir, output_file)
335
+ else:
336
+ print(f"Failed to generate script for source material: {topic}. Status Code: {response.status_code}")
337
+ return None
338
 
339
  iface = gr.Interface(
340
+ concurrency_limit=20,
341
+ fn=generate_video,
342
+ inputs=["text", gr.Dropdown(['alloy', 'shimmer', 'fable', 'onyx', 'nova', 'echo'], label="Select Voice")],
343
+ outputs="video",
344
+ css=".gradio-container {display: none}"
 
 
 
 
345
  )
346
+
347
  iface.launch()