Spaces:
Runtime error
Runtime error
mhm
commited on
Commit
•
48dc89b
1
Parent(s):
676f74a
Create app.py
Browse files
app.py
CHANGED
@@ -1,54 +1,347 @@
|
|
1 |
-
import
|
2 |
-
from gradio_client import Client, file
|
3 |
import os
|
4 |
import shutil
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
|
8 |
-
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
)
|
17 |
-
print("Image generated successfully.")
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
)
|
29 |
-
print("Faces swapped successfully.")
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
swap_result_content = f.read()
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
f.write(swap_result_content)
|
39 |
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
iface = gr.Interface(
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
"image",
|
50 |
-
description="Generate free AI image with your or any face. Support me in making better AI codes as I am a solo developer [Click here to Donate](https://nowpayments.io/donation/aheed) Contact me for bulk processing and better AI software +92-332-4399819 Please do not duplicate this space without permission",
|
51 |
-
css="footer {visibility: hidden}",
|
52 |
-
title="AI Image with Any Face"
|
53 |
)
|
|
|
54 |
iface.launch()
|
|
|
1 |
+
import json
|
|
|
2 |
import os
|
3 |
import shutil
|
4 |
+
import subprocess
|
5 |
+
import sys
|
6 |
+
import time
|
7 |
+
import math
|
8 |
+
import cv2
|
9 |
+
import requests
|
10 |
+
from pydub import AudioSegment
|
11 |
+
import numpy as np
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
import gradio as gr
|
14 |
+
|
15 |
+
# Load environment variables from .env file
|
16 |
+
load_dotenv(override=True)
|
17 |
+
|
18 |
+
# Read API keys from environment variables
|
19 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
20 |
+
LEMONFOX_API_KEY = os.getenv("LEMONFOX_API_KEY")
|
21 |
+
|
22 |
+
narration_api = "openai"
|
23 |
+
|
24 |
+
def parse(narration):
|
25 |
+
data = []
|
26 |
+
narrations = []
|
27 |
+
lines = narration.split("\n")
|
28 |
+
for line in lines:
|
29 |
+
if line.startswith('Narrator: '):
|
30 |
+
text = line.replace('Narrator: ', '')
|
31 |
+
data.append({
|
32 |
+
"type": "text",
|
33 |
+
"content": text.strip('"'),
|
34 |
+
})
|
35 |
+
narrations.append(text.strip('"'))
|
36 |
+
elif line.startswith('['):
|
37 |
+
background = line.strip('[]')
|
38 |
+
data.append({
|
39 |
+
"type": "image",
|
40 |
+
"description": background,
|
41 |
+
})
|
42 |
+
return data, narrations
|
43 |
+
|
44 |
+
def create(data, output_folder, voice="shimmer"): # Add voice parameter with default value
|
45 |
+
if not os.path.exists(output_folder):
|
46 |
+
os.makedirs(output_folder)
|
47 |
+
|
48 |
+
n = 0
|
49 |
+
for element in data:
|
50 |
+
if element["type"] != "text":
|
51 |
+
continue
|
52 |
+
|
53 |
+
n += 1
|
54 |
+
output_file = os.path.join(output_folder, f"narration_{n}.mp3")
|
55 |
+
|
56 |
+
if narration_api == "openai":
|
57 |
+
tts_url = 'https://api.openai.com/v1/audio/speech'
|
58 |
+
headers = {
|
59 |
+
'Authorization': f'Bearer {OPENAI_API_KEY}',
|
60 |
+
'Content-Type': 'application/json'
|
61 |
+
}
|
62 |
+
payload = {
|
63 |
+
"model": "tts-1",
|
64 |
+
"input": element["content"],
|
65 |
+
"voice": voice # Use the selected voice here
|
66 |
+
}
|
67 |
+
response = requests.post(tts_url, json=payload, headers=headers)
|
68 |
+
|
69 |
+
if response.status_code == 200:
|
70 |
+
with open(output_file, "wb") as f:
|
71 |
+
f.write(response.content)
|
72 |
+
else:
|
73 |
+
print(f"Failed to generate audio for prompt: {element['content']}. Status Code: {response.status_code}")
|
74 |
+
|
75 |
+
def generate(prompt, output_file, size="576x1024"):
|
76 |
+
url = 'https://api.lemonfox.ai/v1/images/generations'
|
77 |
+
headers = {
|
78 |
+
'Authorization': LEMONFOX_API_KEY,
|
79 |
+
'Content-Type': 'application/json'
|
80 |
+
}
|
81 |
+
data = {
|
82 |
+
'prompt': prompt,
|
83 |
+
'size': size,
|
84 |
+
'n': 1
|
85 |
+
}
|
86 |
+
|
87 |
+
try:
|
88 |
+
response = requests.post(url, json=data, headers=headers)
|
89 |
+
if response.ok:
|
90 |
+
response_data = response.json()
|
91 |
+
if 'data' in response_data and len(response_data['data']) > 0:
|
92 |
+
image_info = response_data['data'][0]
|
93 |
+
image_url = image_info['url']
|
94 |
+
|
95 |
+
image_response = requests.get(image_url)
|
96 |
+
with open(output_file, 'wb') as f:
|
97 |
+
f.write(image_response.content)
|
98 |
+
|
99 |
+
else:
|
100 |
+
print(f"No image data found for prompt: {prompt}")
|
101 |
+
else:
|
102 |
+
print(f"Failed to generate image for prompt: {prompt}. Status Code: {response.status_code}")
|
103 |
+
except Exception as e:
|
104 |
+
print(f"Error occurred while processing prompt: {prompt}")
|
105 |
+
print(str(e))
|
106 |
+
|
107 |
+
def create_from_data(data, output_dir):
|
108 |
+
if not os.path.exists(output_dir):
|
109 |
+
os.makedirs(output_dir)
|
110 |
+
|
111 |
+
image_number = 0
|
112 |
+
for element in data:
|
113 |
+
if element["type"] != "image":
|
114 |
+
continue
|
115 |
+
image_number += 1
|
116 |
+
image_name = f"image_{image_number}.webp"
|
117 |
+
generate(element["description"], os.path.join(output_dir, image_name))
|
118 |
+
|
119 |
+
def get_audio_duration(audio_file):
|
120 |
+
return len(AudioSegment.from_file(audio_file))
|
121 |
+
|
122 |
+
def resize_image(image, width, height):
|
123 |
+
aspect_ratio = image.shape[1] / image.shape[0]
|
124 |
+
|
125 |
+
if aspect_ratio > (width / height):
|
126 |
+
new_width = width
|
127 |
+
new_height = int(width / aspect_ratio)
|
128 |
+
else:
|
129 |
+
new_height = height
|
130 |
+
new_width = int(height * aspect_ratio)
|
131 |
+
|
132 |
+
return cv2.resize(image, (new_width, new_height))
|
133 |
+
|
134 |
+
def write_text(text, frame, video_writer):
|
135 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
136 |
+
white_color = (255, 255, 255)
|
137 |
+
black_color = (0, 0, 0)
|
138 |
+
thickness = 10
|
139 |
+
font_scale = 3
|
140 |
+
border = 5
|
141 |
+
|
142 |
+
text_size = cv2.getTextSize(text, font, font_scale, thickness)[0]
|
143 |
+
text_x = (frame.shape[1] - text_size[0]) // 2
|
144 |
+
text_y = (frame.shape[0] + text_size[1]) // 2
|
145 |
+
org = (text_x, text_y)
|
146 |
+
|
147 |
+
frame = cv2.putText(frame, text, org, font, font_scale, black_color, thickness + border * 2, cv2.LINE_AA)
|
148 |
+
frame = cv2.putText(frame, text, org, font, font_scale, white_color, thickness, cv2.LINE_AA)
|
149 |
+
|
150 |
+
video_writer.write(frame)
|
151 |
+
|
152 |
+
def add_narration_to_video(narrations, input_video, output_dir, output_file, text_color, text_position):
|
153 |
+
offset = 50
|
154 |
+
cap = cv2.VideoCapture(input_video)
|
155 |
+
temp_video = os.path.join(output_dir, "with_transcript.mp4") # Change file extension to MP4
|
156 |
+
out = cv2.VideoWriter(temp_video, cv2.VideoWriter_fourcc(*'mp4v'), 30, (int(cap.get(3)), int(cap.get(4))))
|
157 |
+
|
158 |
+
full_narration = AudioSegment.empty()
|
159 |
+
|
160 |
+
for i, narration in enumerate(narrations):
|
161 |
+
audio = os.path.join(output_dir, "narrations", f"narration_{i+1}.mp3")
|
162 |
+
duration = get_audio_duration(audio)
|
163 |
+
narration_frames = math.floor(duration / 1000 * 30)
|
164 |
+
|
165 |
+
full_narration += AudioSegment.from_file(audio)
|
166 |
+
|
167 |
+
char_count = len(narration.replace(" ", ""))
|
168 |
+
ms_per_char = duration / char_count
|
169 |
|
170 |
+
frames_written = 0
|
171 |
+
words = narration.split(" ")
|
172 |
+
for w, word in enumerate(words):
|
173 |
+
word_ms = len(word) * ms_per_char
|
174 |
|
175 |
+
if i == 0 and w == 0:
|
176 |
+
word_ms -= offset
|
177 |
+
if word_ms < 0:
|
178 |
+
word_ms = 0
|
179 |
|
180 |
+
for _ in range(math.floor(word_ms/1000*30)):
|
181 |
+
ret, frame = cap.read()
|
182 |
+
if not ret:
|
183 |
+
break
|
184 |
+
write_text(word, frame, out)
|
185 |
+
frames_written += 1
|
|
|
|
|
186 |
|
187 |
+
for _ in range(narration_frames - frames_written):
|
188 |
+
ret, frame = cap.read()
|
189 |
+
out.write(frame)
|
190 |
|
191 |
+
while out.isOpened():
|
192 |
+
ret, frame = cap.read()
|
193 |
+
if not ret:
|
194 |
+
break
|
195 |
+
out.write(frame)
|
|
|
|
|
196 |
|
197 |
+
temp_narration = os.path.join(output_dir, "narration.mp3")
|
198 |
+
full_narration.export(temp_narration, format="mp3")
|
|
|
199 |
|
200 |
+
cap.release()
|
201 |
+
out.release()
|
202 |
+
cv2.destroyAllWindows()
|
|
|
203 |
|
204 |
+
ffmpeg_command = [
|
205 |
+
'ffmpeg',
|
206 |
+
'-y',
|
207 |
+
'-i', temp_video,
|
208 |
+
'-i', temp_narration,
|
209 |
+
'-map', '0:v',
|
210 |
+
'-map', '1:a',
|
211 |
+
'-c:v', 'libx264', # Use H.264 codec
|
212 |
+
'-c:a', 'aac',
|
213 |
+
'-strict', 'experimental',
|
214 |
+
os.path.join(output_dir, output_file)
|
215 |
+
]
|
216 |
+
|
217 |
+
subprocess.run(ffmpeg_command, capture_output=True)
|
218 |
+
|
219 |
+
os.remove(temp_video)
|
220 |
+
os.remove(temp_narration)
|
221 |
+
|
222 |
+
def create_video(narrations, output_dir, output_file, text_color, text_position):
|
223 |
+
width, height = 1080, 1920
|
224 |
+
frame_rate = 30
|
225 |
+
fade_time = 1000
|
226 |
+
|
227 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Change codec to MP4V
|
228 |
+
temp_video = os.path.join(output_dir, "temp_video.mp4") # Change file extension to MP4
|
229 |
+
out = cv2.VideoWriter(temp_video, fourcc, frame_rate, (width, height))
|
230 |
+
|
231 |
+
image_paths = os.listdir(os.path.join(output_dir, "images"))
|
232 |
+
image_count = len(image_paths)
|
233 |
+
|
234 |
+
for i in range(image_count):
|
235 |
+
image1 = cv2.imread(os.path.join(output_dir, "images", f"image_{i+1}.webp"))
|
236 |
+
|
237 |
+
if i+1 < image_count:
|
238 |
+
image2 = cv2.imread(os.path.join(output_dir, "images", f"image_{i+2}.webp"))
|
239 |
+
else:
|
240 |
+
image2 = cv2.imread(os.path.join(output_dir, "images", f"image_1.webp"))
|
241 |
+
|
242 |
+
image1 = resize_image(image1, width, height)
|
243 |
+
image2 = resize_image(image2, width, height)
|
244 |
+
|
245 |
+
narration = os.path.join(output_dir, "narrations", f"narration_{i+1}.mp3")
|
246 |
+
duration = get_audio_duration(narration)
|
247 |
+
|
248 |
+
if i > 0:
|
249 |
+
duration -= fade_time
|
250 |
+
|
251 |
+
if i == image_count-1:
|
252 |
+
duration -= fade_time
|
253 |
+
|
254 |
+
for _ in range(math.floor(duration/1000*30)):
|
255 |
+
vertical_video_frame = np.zeros((height, width, 3), dtype=np.uint8)
|
256 |
+
vertical_video_frame[:image1.shape[0], :] = image1
|
257 |
+
|
258 |
+
out.write(vertical_video_frame)
|
259 |
+
|
260 |
+
for alpha in np.linspace(0, 1, math.floor(fade_time/1000*30)):
|
261 |
+
blended_image = cv2.addWeighted(image1, 1 - alpha, image2, alpha, 0)
|
262 |
+
vertical_video_frame = np.zeros((height, width, 3), dtype=np.uint8)
|
263 |
+
vertical_video_frame[:image1.shape[0], :] = blended_image
|
264 |
+
|
265 |
+
out.write(vertical_video_frame)
|
266 |
+
|
267 |
+
out.release()
|
268 |
+
cv2.destroyAllWindows()
|
269 |
+
|
270 |
+
add_narration_to_video(narrations, temp_video, output_dir, output_file, text_color, text_position)
|
271 |
+
os.remove(temp_video)
|
272 |
+
|
273 |
+
def generate_video(topic, voice="shimmer"):
|
274 |
+
short_id = str(int(time.time()))
|
275 |
+
basedir = os.path.join("shorts", short_id)
|
276 |
+
if not os.path.exists(basedir):
|
277 |
+
os.makedirs(basedir)
|
278 |
+
|
279 |
+
filename = topic.replace("_", " ").replace("/", "_").replace(".", "_")
|
280 |
+
output_file = f"{filename}.mp4" # Change file extension to MP4
|
281 |
+
|
282 |
+
chat_url = 'https://api.openai.com/v1/chat/completions'
|
283 |
+
headers = {
|
284 |
+
'Authorization': f'Bearer {OPENAI_API_KEY}',
|
285 |
+
'Content-Type': 'application/json'
|
286 |
+
}
|
287 |
+
payload = {
|
288 |
+
"model": "gpt-3.5-turbo",
|
289 |
+
"messages": [
|
290 |
+
{
|
291 |
+
"role": "system",
|
292 |
+
"content": "You are a viral youTube short video creator."
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"role": "user",
|
296 |
+
"content": f"""Make a 60 second video on: \n\n{topic} and you will need to generate a very short description of images for each of the scenes. They will be used for background AI images. Note that the script will be fed into a text-to-speech engine, so dont use special characters. Respond with a pair of an image prompt in square brackets and a script below it. Both of them should be on their own lines, as follows:
|
297 |
+
###
|
298 |
+
[Description of a background image]
|
299 |
+
Narrator: "Sentence of narration"
|
300 |
+
###"""
|
301 |
+
}
|
302 |
+
]
|
303 |
+
}
|
304 |
+
response = requests.post(chat_url, json=payload, headers=headers)
|
305 |
+
|
306 |
+
if response.status_code == 200:
|
307 |
+
response_text = response.json()['choices'][0]['message']['content']
|
308 |
+
response_text = response_text.replace("’", "'").replace("`", "'").replace("…", "...").replace("“", '"').replace("”", '"')
|
309 |
+
|
310 |
+
with open(os.path.join(basedir, f"response.txt"), "a") as f:
|
311 |
+
f.write(response_text + "\n")
|
312 |
+
|
313 |
+
data, narrations = parse(response_text)
|
314 |
+
with open(os.path.join(basedir, f"data.json"), "a") as f:
|
315 |
+
json.dump(data, f, ensure_ascii=False)
|
316 |
+
f.write("\n")
|
317 |
+
|
318 |
+
print(f"Generating narration for: {topic}...")
|
319 |
+
create(data, os.path.join(basedir, f"narrations"), voice=voice)
|
320 |
+
|
321 |
+
print("Generating images...")
|
322 |
+
create_from_data(data, os.path.join(basedir, f"images"))
|
323 |
+
|
324 |
+
print("Generating video...")
|
325 |
+
create_video(narrations, basedir, output_file, text_color="white", text_position="center")
|
326 |
+
|
327 |
+
print("Deleting files and folders...")
|
328 |
+
os.remove(os.path.join(basedir, "response.txt"))
|
329 |
+
os.remove(os.path.join(basedir, "data.json"))
|
330 |
+
shutil.rmtree(os.path.join(basedir, "narrations"))
|
331 |
+
shutil.rmtree(os.path.join(basedir, "images"))
|
332 |
+
|
333 |
+
print(f"DONE! Here's your video: {os.path.join(basedir, output_file)}")
|
334 |
+
return os.path.join(basedir, output_file)
|
335 |
+
else:
|
336 |
+
print(f"Failed to generate script for source material: {topic}. Status Code: {response.status_code}")
|
337 |
+
return None
|
338 |
|
339 |
iface = gr.Interface(
|
340 |
+
concurrency_limit=20,
|
341 |
+
fn=generate_video,
|
342 |
+
inputs=["text", gr.Dropdown(['alloy', 'shimmer', 'fable', 'onyx', 'nova', 'echo'], label="Select Voice")],
|
343 |
+
outputs="video",
|
344 |
+
css=".gradio-container {display: none}"
|
|
|
|
|
|
|
|
|
345 |
)
|
346 |
+
|
347 |
iface.launch()
|