Spaces:
Runtime error
Runtime error
uppdate
Browse files
app.py
CHANGED
@@ -1,4 +1,63 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
|
4 |
|
@@ -6,14 +65,34 @@ def text2speech(text):
|
|
6 |
return fastspeech(text)
|
7 |
|
8 |
def engine(text_input):
|
|
|
9 |
ner = gr.Interface.load("huggingface/flair/ner-english-ontonotes-large")
|
10 |
entities = ner(text_input)
|
11 |
entities = [tupl for tupl in entities if None not in tupl]
|
12 |
entities_num = len(entities)
|
13 |
|
14 |
#img = run(text_input,'50','256','256','1',10) #entities[0][0]
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
#img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
|
18 |
#outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
|
19 |
#title="Convert text to image")
|
@@ -28,13 +107,14 @@ def engine(text_input):
|
|
28 |
#inputs=['George',50,256,256,1,10]
|
29 |
#run(prompt, steps, width, height, images, scale)
|
30 |
|
31 |
-
|
32 |
-
return img, entities, speech
|
33 |
|
34 |
#image = gr.outputs.Image(type="pil", label="output image")
|
35 |
app = gr.Interface(engine,
|
36 |
gr.inputs.Textbox(lines=5, label="Input Text"),
|
37 |
-
|
|
|
38 |
#live=True,
|
39 |
#outputs=[#gr.outputs.Textbox(type="auto", label="Text"),gr.outputs.Audio(type="file", label="Speech Answer"),
|
40 |
#outputs= img, #gr.outputs.Carousel(label="Individual images",components=["image"]), #, gr.outputs.Textbox(label="Error")],
|
|
|
1 |
import gradio as gr
|
2 |
+
import moviepy.video.io.ImageSequenceClip
|
3 |
+
#image_folder= '/content/gdrive/My Drive/AI/sample_imgs/'
|
4 |
+
from PIL import Image
|
5 |
+
#import os, sys
|
6 |
+
from pydub import AudioSegment
|
7 |
+
# Import everything needed to edit video clips
|
8 |
+
from moviepy.editor import *
|
9 |
+
|
10 |
+
#path = "/content/gdrive/My Drive/AI/sample_imgs/"
|
11 |
+
#dirs = os.listdir( path )
|
12 |
+
|
13 |
+
def resize(img_list):
|
14 |
+
resize_img_list = []
|
15 |
+
for item in img_list:
|
16 |
+
im = Image.open(item)
|
17 |
+
imResize = im.resize((256,256), Image.ANTIALIAS)
|
18 |
+
resize_img_list.append(imResize)
|
19 |
+
return resize_img_list
|
20 |
+
|
21 |
+
#def resize():
|
22 |
+
# for item in dirs:
|
23 |
+
# if os.path.isfile(path+item):
|
24 |
+
# im = Image.open(path+item)
|
25 |
+
# f, e = os.path.splitext(path+item)
|
26 |
+
# imResize = im.resize((256,256), Image.ANTIALIAS)
|
27 |
+
# imResize.save(f + ' resized.jpg', 'JPEG', quality=90)
|
28 |
+
|
29 |
+
#resize_img_list = resize(img_list)
|
30 |
+
|
31 |
+
|
32 |
+
#image_files = [os.path.join(image_folder,img)
|
33 |
+
# for img in resize_img_list
|
34 |
+
# if img.endswith(".jpg")]
|
35 |
+
#print(image_files)
|
36 |
+
def images_to_video(fps, resize_img_list):
|
37 |
+
clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(resize_img_list, fps=fps)
|
38 |
+
return clip
|
39 |
+
#clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
|
40 |
+
|
41 |
+
|
42 |
+
def merge_audio_video(speech, clip):
|
43 |
+
#convert flac to mp3 audio format
|
44 |
+
wav_audio = AudioSegment.from_file(speech, "flac") #("/content/gdrive/My Drive/AI/audio1.flac", "flac")
|
45 |
+
wav_audio.export("audio.mp3", format="mp3") #("/content/gdrive/My Drive/AI/audio1.mp3", format="mp3")
|
46 |
+
|
47 |
+
# loading video dsa gfg intro video
|
48 |
+
clip = VideoFileClip(clip) #("/content/gdrive/My Drive/AI/my_video1.mp4")
|
49 |
+
|
50 |
+
# loading audio file
|
51 |
+
audioclip = AudioFileClip('audio.mp3') #.subclip(0, 15)
|
52 |
+
|
53 |
+
# adding audio to the video clip
|
54 |
+
videoclip = clip.set_audio(audioclip)
|
55 |
+
|
56 |
+
# showing video clip
|
57 |
+
#videoclip.ipython_display()
|
58 |
+
|
59 |
+
return videoclip
|
60 |
+
|
61 |
|
62 |
fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
|
63 |
|
|
|
65 |
return fastspeech(text)
|
66 |
|
67 |
def engine(text_input):
|
68 |
+
#Extract entities from text
|
69 |
ner = gr.Interface.load("huggingface/flair/ner-english-ontonotes-large")
|
70 |
entities = ner(text_input)
|
71 |
entities = [tupl for tupl in entities if None not in tupl]
|
72 |
entities_num = len(entities)
|
73 |
|
74 |
#img = run(text_input,'50','256','256','1',10) #entities[0][0]
|
75 |
+
#Generate images using multimodelart's space for each entity identified above
|
76 |
+
img_list = []
|
77 |
+
for ent in entities:
|
78 |
+
img = gr.Interface.load("spaces/multimodalart/latentdiffusion")(ent[0],'50','256','256','1',10)[0]
|
79 |
+
img_list.append(img)
|
80 |
+
|
81 |
+
#Resizing all images produced to same size
|
82 |
+
resize_img_list = resize(img_list)
|
83 |
+
|
84 |
+
#Convert text to speech using facebook's latest model from HF hub
|
85 |
+
speech = text2speech(text_input)
|
86 |
+
|
87 |
+
#Calculate the desired frame per second based on given audio length and entities identified
|
88 |
+
fps= entities_num / 19 #length of audio file #13 / 19
|
89 |
+
|
90 |
+
#Convert string of images into a video
|
91 |
+
clip = images_to_video(fps, resize_img_list)
|
92 |
+
|
93 |
+
#Merge video and audio created above
|
94 |
+
merged_file = merge_audio_video(speech, clip)
|
95 |
+
#{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
|
96 |
#img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
|
97 |
#outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
|
98 |
#title="Convert text to image")
|
|
|
107 |
#inputs=['George',50,256,256,1,10]
|
108 |
#run(prompt, steps, width, height, images, scale)
|
109 |
|
110 |
+
|
111 |
+
return merged_file #img, entities, speech
|
112 |
|
113 |
#image = gr.outputs.Image(type="pil", label="output image")
|
114 |
app = gr.Interface(engine,
|
115 |
gr.inputs.Textbox(lines=5, label="Input Text"),
|
116 |
+
gradio.outputs.Video(self, type=None, label=None),
|
117 |
+
#[gr.outputs.Image(type="auto", label="Output"), gr.outputs.Textbox(type="auto", label="Text"), gr.outputs.Audio(type="file", label="Speech Answer") ],
|
118 |
#live=True,
|
119 |
#outputs=[#gr.outputs.Textbox(type="auto", label="Text"),gr.outputs.Audio(type="file", label="Speech Answer"),
|
120 |
#outputs= img, #gr.outputs.Carousel(label="Individual images",components=["image"]), #, gr.outputs.Textbox(label="Error")],
|