Manasa1 commited on
Commit
3638d85
·
verified ·
1 Parent(s): 4c305b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -13
app.py CHANGED
@@ -4,40 +4,40 @@ from diffusers import StableDiffusionPipeline
4
  import torch
5
  from PIL import Image, ImageDraw, ImageFont
6
  import scipy.io.wavfile
7
- from TTS.api import TTS
8
  from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips
9
  import os
 
 
10
 
 
 
 
11
 
12
  # Use DistilGPT-2 for text generation
13
  script_generator = pipeline("text-generation", model="distilgpt2", truncation=True, max_length=100)
14
 
15
- # Use Stable Diffusion for image generation
16
- image_generator = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base", torch_dtype=torch.float32).to("cpu")
17
-
18
  # Use Coqui TTS for text-to-speech
19
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
20
 
21
- # Placeholder for music generation
22
- def generate_fun_music(prompt, output_music_file="fun_music.wav"):
23
- # Generate silence as placeholder
24
- duration = 3 # seconds
25
- sample_rate = 44100
26
- silence = np.zeros(int(sample_rate * duration), dtype=np.int16)
27
- scipy.io.wavfile.write(output_music_file, sample_rate, silence)
28
- return output_music_file
29
 
 
 
30
 
 
31
  def generate_comedy_script(prompt):
32
  script = script_generator(prompt)[0]['generated_text']
33
  return script
34
 
35
-
36
  def text_to_speech(script):
37
  output_audio = 'output.wav'
38
  tts.tts_to_file(text=script, file_path=output_audio)
39
  return output_audio
40
 
 
41
  def create_images_from_script(script):
42
  lines = script.split('. ')
43
  image_paths = []
@@ -48,6 +48,15 @@ def create_images_from_script(script):
48
  image_paths.append(img_path)
49
  return image_paths
50
 
 
 
 
 
 
 
 
 
 
51
  def generate_text_video(script):
52
  image_paths = create_images_from_script(script)
53
  clips = []
@@ -58,6 +67,7 @@ def generate_text_video(script):
58
  final_video.write_videofile("/tmp/final_video.mp4", fps=24)
59
  return "/tmp/final_video.mp4"
60
 
 
61
  def combine_audio_video(video_path, audio_path):
62
  video = VideoFileClip(video_path)
63
  audio = AudioFileClip(audio_path)
@@ -65,6 +75,7 @@ def combine_audio_video(video_path, audio_path):
65
  final_video.write_videofile("/tmp/final_comedy_video.mp4", fps=24)
66
  return "/tmp/final_comedy_video.mp4"
67
 
 
68
  def generate_comedy_and_animation(prompt):
69
  script = generate_comedy_script(prompt)
70
  audio_file = text_to_speech(script)
@@ -73,6 +84,57 @@ def generate_comedy_and_animation(prompt):
73
  final_video = combine_audio_video(video_file, fun_music)
74
  return script, audio_file, final_video
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
 
78
 
 
4
  import torch
5
  from PIL import Image, ImageDraw, ImageFont
6
  import scipy.io.wavfile
7
+ from TTS.api import TTS # Coqui TTS (open source)
8
  from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips
9
  import os
10
+ from groq import Groq
11
+ from deepgram import Deepgram
12
 
13
+ # Initialize Clients
14
+ groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
15
+ deepgram_client = Deepgram(api_key=os.environ.get("DEEGRAM_API_KEY"))
16
 
17
  # Use DistilGPT-2 for text generation
18
  script_generator = pipeline("text-generation", model="distilgpt2", truncation=True, max_length=100)
19
 
 
 
 
20
  # Use Coqui TTS for text-to-speech
21
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
22
 
23
+ # Use MusicGen for music generation
24
+ music_generator = pipeline("text-to-audio", model="facebook/musicgen-small", device="cpu")
 
 
 
 
 
 
25
 
26
+ # Use Stable Diffusion for image generation
27
+ image_generator = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base", torch_dtype=torch.float32).to("cpu")
28
 
29
+ # Generate Comedy Script using DistilGPT-2
30
  def generate_comedy_script(prompt):
31
  script = script_generator(prompt)[0]['generated_text']
32
  return script
33
 
34
+ # Convert Text to Speech using Coqui TTS
35
  def text_to_speech(script):
36
  output_audio = 'output.wav'
37
  tts.tts_to_file(text=script, file_path=output_audio)
38
  return output_audio
39
 
40
+ # Create Images Using Stable Diffusion
41
  def create_images_from_script(script):
42
  lines = script.split('. ')
43
  image_paths = []
 
48
  image_paths.append(img_path)
49
  return image_paths
50
 
51
+ # Generate Fun Music Track using MusicGen
52
+ def generate_fun_music(prompt, output_music_file="fun_music.wav"):
53
+ response = music_generator(prompt)
54
+ audio_data = response["audio"]
55
+ sampling_rate = response["sampling_rate"]
56
+ scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data)
57
+ return output_music_file
58
+
59
+ # Create Video from Generated Images
60
  def generate_text_video(script):
61
  image_paths = create_images_from_script(script)
62
  clips = []
 
67
  final_video.write_videofile("/tmp/final_video.mp4", fps=24)
68
  return "/tmp/final_video.mp4"
69
 
70
+ # Combine Audio and Video
71
  def combine_audio_video(video_path, audio_path):
72
  video = VideoFileClip(video_path)
73
  audio = AudioFileClip(audio_path)
 
75
  final_video.write_videofile("/tmp/final_comedy_video.mp4", fps=24)
76
  return "/tmp/final_comedy_video.mp4"
77
 
78
+ # Main Function to Generate Comedy Animation
79
  def generate_comedy_and_animation(prompt):
80
  script = generate_comedy_script(prompt)
81
  audio_file = text_to_speech(script)
 
84
  final_video = combine_audio_video(video_file, fun_music)
85
  return script, audio_file, final_video
86
 
87
+ # Generate Kids Content
88
+ def generate_kids_content(theme):
89
+ music_file = generate_fun_music(theme, output_music_file="kids_music.wav")
90
+ clips = []
91
+ for i in range(5):
92
+ img = Image.new('RGB', (800, 400), color=(0, 0, 255))
93
+ d = ImageDraw.Draw(img)
94
+ fnt = ImageFont.load_default()
95
+ d.text((10, 180), f"Kids Music: {theme}", font=fnt, fill=(255, 255, 0))
96
+ frame_path = f'/tmp/kids_temp_{i}.png'
97
+ img.save(frame_path)
98
+ clips.append(ImageClip(frame_path).set_duration(1).set_position(('center', 'center')))
99
+ final_video = CompositeVideoClip(clips, size=(800, 400))
100
+ final_video = final_video.set_audio(AudioFileClip(music_file))
101
+ final_video.write_videofile("/tmp/kids_animation.mp4", fps=24)
102
+ return music_file, "/tmp/kids_animation.mp4"
103
+
104
+ # Gradio Interface
105
+ with gr.Blocks() as app:
106
+ gr.Markdown("## AI Comedy and Kids Content Generator")
107
+
108
+ # Comedy Animation Tab
109
+ with gr.Tab("Generate Comedy Animation"):
110
+ prompt_input = gr.Textbox(label="Comedy Prompt")
111
+ generate_btn = gr.Button("Generate Comedy Script and Animation")
112
+ comedy_script = gr.Textbox(label="Generated Script")
113
+ comedy_audio = gr.Audio(label="Generated Audio")
114
+ comedy_video = gr.Video(label="Generated Animation")
115
+
116
+ generate_btn.click(
117
+ generate_comedy_and_animation,
118
+ inputs=prompt_input,
119
+ outputs=[comedy_script, comedy_audio, comedy_video]
120
+ )
121
+
122
+ # Kids Music Animation Tab
123
+ with gr.Tab("Generate Kids Music Animation"):
124
+ theme_input = gr.Textbox(label="Kids Music Theme")
125
+ generate_music_btn = gr.Button("Generate Kids Music and Animation")
126
+ kids_music_audio = gr.Audio(label="Generated Music")
127
+ kids_music_video = gr.Video(label="Generated Kids Animation")
128
+
129
+ generate_music_btn.click(
130
+ generate_kids_content,
131
+ inputs=theme_input,
132
+ outputs=[kids_music_audio, kids_music_video]
133
+ )
134
+
135
+ app.launch()
136
+
137
+
138
 
139
 
140