ysharma HF staff commited on
Commit
8d3fb0e
1 Parent(s): 98b7ed3
Files changed (1) hide show
  1. app.py +85 -5
app.py CHANGED
@@ -1,4 +1,63 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
4
 
@@ -6,14 +65,34 @@ def text2speech(text):
6
  return fastspeech(text)
7
 
8
  def engine(text_input):
 
9
  ner = gr.Interface.load("huggingface/flair/ner-english-ontonotes-large")
10
  entities = ner(text_input)
11
  entities = [tupl for tupl in entities if None not in tupl]
12
  entities_num = len(entities)
13
 
14
  #img = run(text_input,'50','256','256','1',10) #entities[0][0]
15
-
16
- img = gr.Interface.load("spaces/multimodalart/latentdiffusion")(text_input[0][0],'50','256','256','1',10)[0] #inputs={'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  #img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
18
  #outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
19
  #title="Convert text to image")
@@ -28,13 +107,14 @@ def engine(text_input):
28
  #inputs=['George',50,256,256,1,10]
29
  #run(prompt, steps, width, height, images, scale)
30
 
31
- speech = text2speech(text_input)
32
- return img, entities, speech
33
 
34
  #image = gr.outputs.Image(type="pil", label="output image")
35
  app = gr.Interface(engine,
36
  gr.inputs.Textbox(lines=5, label="Input Text"),
37
- [gr.outputs.Image(type="auto", label="Output"), gr.outputs.Textbox(type="auto", label="Text"), gr.outputs.Audio(type="file", label="Speech Answer") ],
 
38
  #live=True,
39
  #outputs=[#gr.outputs.Textbox(type="auto", label="Text"),gr.outputs.Audio(type="file", label="Speech Answer"),
40
  #outputs= img, #gr.outputs.Carousel(label="Individual images",components=["image"]), #, gr.outputs.Textbox(label="Error")],
 
1
  import gradio as gr
2
+ import moviepy.video.io.ImageSequenceClip
3
+ #image_folder= '/content/gdrive/My Drive/AI/sample_imgs/'
4
+ from PIL import Image
5
+ #import os, sys
6
+ from pydub import AudioSegment
7
+ # Import everything needed to edit video clips
8
+ from moviepy.editor import *
9
+
10
+ #path = "/content/gdrive/My Drive/AI/sample_imgs/"
11
+ #dirs = os.listdir( path )
12
+
13
+ def resize(img_list):
14
+ resize_img_list = []
15
+ for item in img_list:
16
+ im = Image.open(item)
17
+ imResize = im.resize((256,256), Image.ANTIALIAS)
18
+ resize_img_list.append(imResize)
19
+ return resize_img_list
20
+
21
+ #def resize():
22
+ # for item in dirs:
23
+ # if os.path.isfile(path+item):
24
+ # im = Image.open(path+item)
25
+ # f, e = os.path.splitext(path+item)
26
+ # imResize = im.resize((256,256), Image.ANTIALIAS)
27
+ # imResize.save(f + ' resized.jpg', 'JPEG', quality=90)
28
+
29
+ #resize_img_list = resize(img_list)
30
+
31
+
32
+ #image_files = [os.path.join(image_folder,img)
33
+ # for img in resize_img_list
34
+ # if img.endswith(".jpg")]
35
+ #print(image_files)
36
+ def images_to_video(fps, resize_img_list):
37
+ clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(resize_img_list, fps=fps)
38
+ return clip
39
+ #clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
40
+
41
+
42
+ def merge_audio_video(speech, clip):
43
+ #convert flac to mp3 audio format
44
+ wav_audio = AudioSegment.from_file(speech, "flac") #("/content/gdrive/My Drive/AI/audio1.flac", "flac")
45
+ wav_audio.export("audio.mp3", format="mp3") #("/content/gdrive/My Drive/AI/audio1.mp3", format="mp3")
46
+
47
+ # loading video dsa gfg intro video
48
+ clip = VideoFileClip(clip) #("/content/gdrive/My Drive/AI/my_video1.mp4")
49
+
50
+ # loading audio file
51
+ audioclip = AudioFileClip('audio.mp3') #.subclip(0, 15)
52
+
53
+ # adding audio to the video clip
54
+ videoclip = clip.set_audio(audioclip)
55
+
56
+ # showing video clip
57
+ #videoclip.ipython_display()
58
+
59
+ return videoclip
60
+
61
 
62
  fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
63
 
 
65
  return fastspeech(text)
66
 
67
  def engine(text_input):
68
+ #Extract entities from text
69
  ner = gr.Interface.load("huggingface/flair/ner-english-ontonotes-large")
70
  entities = ner(text_input)
71
  entities = [tupl for tupl in entities if None not in tupl]
72
  entities_num = len(entities)
73
 
74
  #img = run(text_input,'50','256','256','1',10) #entities[0][0]
75
+ #Generate images using multimodelart's space for each entity identified above
76
+ img_list = []
77
+ for ent in entities:
78
+ img = gr.Interface.load("spaces/multimodalart/latentdiffusion")(ent[0],'50','256','256','1',10)[0]
79
+ img_list.append(img)
80
+
81
+ #Resizing all images produced to same size
82
+ resize_img_list = resize(img_list)
83
+
84
+ #Convert text to speech using facebook's latest model from HF hub
85
+ speech = text2speech(text_input)
86
+
87
+ #Calculate the desired frame per second based on given audio length and entities identified
88
+ fps= entities_num / 19 #length of audio file #13 / 19
89
+
90
+ #Convert string of images into a video
91
+ clip = images_to_video(fps, resize_img_list)
92
+
93
+ #Merge video and audio created above
94
+ merged_file = merge_audio_video(speech, clip)
95
+ #{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
96
  #img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
97
  #outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
98
  #title="Convert text to image")
 
107
  #inputs=['George',50,256,256,1,10]
108
  #run(prompt, steps, width, height, images, scale)
109
 
110
+
111
+ return merged_file #img, entities, speech
112
 
113
  #image = gr.outputs.Image(type="pil", label="output image")
114
  app = gr.Interface(engine,
115
  gr.inputs.Textbox(lines=5, label="Input Text"),
116
+ gradio.outputs.Video(self, type=None, label=None),
117
+ #[gr.outputs.Image(type="auto", label="Output"), gr.outputs.Textbox(type="auto", label="Text"), gr.outputs.Audio(type="file", label="Speech Answer") ],
118
  #live=True,
119
  #outputs=[#gr.outputs.Textbox(type="auto", label="Text"),gr.outputs.Audio(type="file", label="Speech Answer"),
120
  #outputs= img, #gr.outputs.Carousel(label="Individual images",components=["image"]), #, gr.outputs.Textbox(label="Error")],