Manasa1 commited on
Commit
e395658
1 Parent(s): 3b60319

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -80
app.py CHANGED
@@ -7,37 +7,37 @@ import scipy.io.wavfile
7
  from TTS.api import TTS
8
  from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips
9
  import os
10
- from groq import Groq
11
- from deepgram import Deepgram
12
 
13
- # Initialize Clients
14
- groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
15
- deepgram_client = Deepgram(api_key=os.environ.get("DEEGRAM_API_KEY"))
16
 
17
  # Use DistilGPT-2 for text generation
18
  script_generator = pipeline("text-generation", model="distilgpt2", truncation=True, max_length=100)
19
 
20
- # Use Whisper for text-to-speech
21
- tts = TTS(model_name="whisper", progress_bar=False, gpu=False)
22
-
23
- # Use MusicLM for music generation
24
- music_generator = pipeline("text-to-audio", model="musiclm", device="cpu")
25
-
26
  # Use Stable Diffusion for image generation
27
  image_generator = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base", torch_dtype=torch.float32).to("cpu")
28
 
29
- # Generate Comedy Script using DistilGPT-2
 
 
 
 
 
 
 
 
 
 
 
 
30
  def generate_comedy_script(prompt):
31
  script = script_generator(prompt)[0]['generated_text']
32
  return script
33
 
34
- # Convert Text to Speech using Whisper
35
  def text_to_speech(script):
36
  output_audio = 'output.wav'
37
  tts.tts_to_file(text=script, file_path=output_audio)
38
  return output_audio
39
 
40
- # Create Images Using Stable Diffusion
41
  def create_images_from_script(script):
42
  lines = script.split('. ')
43
  image_paths = []
@@ -48,21 +48,6 @@ def create_images_from_script(script):
48
  image_paths.append(img_path)
49
  return image_paths
50
 
51
- # Generate Fun Music Track using MusicLM
52
- def generate_fun_music(prompt, output_music_file="fun_music.wav"):
53
- # Generate music based on the prompt using MusicLM
54
- response = music_generator(prompt)
55
-
56
- # Extract audio and sampling rate from the response
57
- audio_data = response["audio"]
58
- sampling_rate = response["sampling_rate"]
59
-
60
- # Save the generated music to a file
61
- scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data)
62
-
63
- return output_music_file
64
-
65
- # Create Video from Generated Images
66
  def generate_text_video(script):
67
  image_paths = create_images_from_script(script)
68
  clips = []
@@ -73,7 +58,6 @@ def generate_text_video(script):
73
  final_video.write_videofile("/tmp/final_video.mp4", fps=24)
74
  return "/tmp/final_video.mp4"
75
 
76
- # Combine Audio and Video
77
  def combine_audio_video(video_path, audio_path):
78
  video = VideoFileClip(video_path)
79
  audio = AudioFileClip(audio_path)
@@ -81,7 +65,6 @@ def combine_audio_video(video_path, audio_path):
81
  final_video.write_videofile("/tmp/final_comedy_video.mp4", fps=24)
82
  return "/tmp/final_comedy_video.mp4"
83
 
84
- # Main Function to Generate Comedy Animation
85
  def generate_comedy_and_animation(prompt):
86
  script = generate_comedy_script(prompt)
87
  audio_file = text_to_speech(script)
@@ -90,55 +73,6 @@ def generate_comedy_and_animation(prompt):
90
  final_video = combine_audio_video(video_file, fun_music)
91
  return script, audio_file, final_video
92
 
93
- # Generate Kids Content
94
- def generate_kids_content(theme):
95
- music_file = generate_fun_music(theme, output_music_file="kids_music.wav")
96
- clips = []
97
- for i in range(5):
98
- img = Image.new('RGB', (800, 400), color=(0, 0, 255))
99
- d = ImageDraw.Draw(img)
100
- fnt = ImageFont.load_default()
101
- d.text((10, 180), f"Kids Music: {theme}", font=fnt, fill=(255, 255, 0))
102
- frame_path = f'/tmp/kids_temp_{i}.png'
103
- img.save(frame_path)
104
- clips.append(ImageClip(frame_path).set_duration(1).set_position(('center', 'center')))
105
- final_video = CompositeVideoClip(clips, size=(800, 400))
106
- final_video = final_video.set_audio(AudioFileClip(music_file))
107
- final_video.write_videofile("/tmp/kids_animation.mp4", fps=24)
108
- return music_file, "/tmp/kids_animation.mp4"
109
-
110
- # Gradio Interface
111
- with gr.Blocks() as app:
112
- gr.Markdown("## AI Comedy and Kids Content Generator")
113
-
114
- # Comedy Animation Tab
115
- with gr.Tab("Generate Comedy Animation"):
116
- prompt_input = gr.Textbox(label="Comedy Prompt")
117
- generate_btn = gr.Button("Generate Comedy Script and Animation")
118
- comedy_script = gr.Textbox(label="Generated Script")
119
- comedy_audio = gr.Audio(label="Generated Audio")
120
- comedy_video = gr.Video(label="Generated Animation")
121
-
122
- generate_btn.click(
123
- generate_comedy_and_animation,
124
- inputs=prompt_input,
125
- outputs=[comedy_script, comedy_audio, comedy_video]
126
- )
127
-
128
- # Kids Music Animation Tab
129
- with gr.Tab("Generate Kids Music Animation"):
130
- theme_input = gr.Textbox(label="Kids Music Theme")
131
- generate_music_btn = gr.Button("Generate Kids Music and Animation")
132
- kids_music_audio = gr.Audio(label="Generated Music")
133
- kids_music_video = gr.Video(label="Generated Kids Animation")
134
-
135
- generate_music_btn.click(
136
- generate_kids_content,
137
- inputs=theme_input,
138
- outputs=[kids_music_audio, kids_music_video]
139
- )
140
-
141
- app.launch()
142
 
143
 
144
 
 
7
  from TTS.api import TTS
8
  from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips
9
  import os
 
 
10
 
 
 
 
11
 
12
  # Use DistilGPT-2 for text generation
13
  script_generator = pipeline("text-generation", model="distilgpt2", truncation=True, max_length=100)
14
 
 
 
 
 
 
 
15
  # Use Stable Diffusion for image generation
16
  image_generator = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base", torch_dtype=torch.float32).to("cpu")
17
 
18
+ # Use Coqui TTS for text-to-speech
19
+ tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
20
+
21
+ # Placeholder for music generation
22
+ def generate_fun_music(prompt, output_music_file="fun_music.wav"):
23
+ # Generate silence as placeholder
24
+ duration = 3 # seconds
25
+ sample_rate = 44100
26
+ silence = np.zeros(int(sample_rate * duration), dtype=np.int16)
27
+ scipy.io.wavfile.write(output_music_file, sample_rate, silence)
28
+ return output_music_file
29
+
30
+
31
  def generate_comedy_script(prompt):
32
  script = script_generator(prompt)[0]['generated_text']
33
  return script
34
 
35
+
36
  def text_to_speech(script):
37
  output_audio = 'output.wav'
38
  tts.tts_to_file(text=script, file_path=output_audio)
39
  return output_audio
40
 
 
41
  def create_images_from_script(script):
42
  lines = script.split('. ')
43
  image_paths = []
 
48
  image_paths.append(img_path)
49
  return image_paths
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def generate_text_video(script):
52
  image_paths = create_images_from_script(script)
53
  clips = []
 
58
  final_video.write_videofile("/tmp/final_video.mp4", fps=24)
59
  return "/tmp/final_video.mp4"
60
 
 
61
  def combine_audio_video(video_path, audio_path):
62
  video = VideoFileClip(video_path)
63
  audio = AudioFileClip(audio_path)
 
65
  final_video.write_videofile("/tmp/final_comedy_video.mp4", fps=24)
66
  return "/tmp/final_comedy_video.mp4"
67
 
 
68
  def generate_comedy_and_animation(prompt):
69
  script = generate_comedy_script(prompt)
70
  audio_file = text_to_speech(script)
 
73
  final_video = combine_audio_video(video_file, fun_music)
74
  return script, audio_file, final_video
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
 
78