Spaces:

Gradio-Blocks
/

Create_GIFs_from_Video

Build error

App Files Files Community

ysharma HF staff commited on May 27, 2022

Commit

f09eeaa

•

1 Parent(s): 0bce71c

1

Browse files

Files changed (1) hide show

app.py +93 -39

app.py CHANGED Viewed

@@ -1,23 +1,63 @@
 import gradio as gr
-#final
-import gradio as gr
-#import json
-#from difflib import Differ
 import ffmpeg
-#import os
 from pathlib import Path
-#import time
 API_URL = "https://api-inference.huggingface.co/models/facebook/wav2vec2-base-960h"
 #headers = {"Authorization": "Bearer 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"}
 HF_TOKEN = os.environ["HF_TOKEN"]
 headers = {"Authorization": f"Bearer {HF_TOKEN}"}
-#convert video to audio
-video_path = Path("./ShiaLaBeouf.mp4")
-audio_memory, _ = ffmpeg.input(video_path).output('-', format="wav", ac=1, ar='16k').overwrite_output().global_args('-loglevel', 'quiet').run(capture_stdout=True)
 #calling the hosted model
 def query_api(audio_bytes: bytes):
     """
@@ -38,20 +78,8 @@ def query_api(audio_bytes: bytes):
     json_reponse = json.loads(response.content.decode("utf-8"))
     return json_reponse
-#Getting transcripts using wav2Vec2 huggingface hosted accelerated inference
-#sending audio file in request along with stride and chunk length information
-model_response = query_api(audio_memory)
-#model response has both - transcripts as well as character timestamps or chunks
-transcription = model_response["text"].lower()
-chnk = model_response["chunks"]
-#creating lists from chunks to consume downstream easily
-timestamps = [[chunk["text"].lower(), chunk["timestamp"][0], chunk["timestamp"][1]]
-          for chunk in chnk]
-#getting word timestams from character timestamps
 def get_word_timestamps(timestamps):
   words, word = [], []
   letter_timestamp, word_timestamp, words_timestamp = [], [], []
@@ -68,15 +96,6 @@ def get_word_timestamps(timestamps):
   words = [word.strip() for word in words]
   return words, words_timestamp
-words, words_timestamp = get_word_timestamps(timestamps)
-#words = [word.strip() for word in words]
-print(f"Total words in the audio transcript is:{len(words)}, transcript word list is :{words}")
-print(f"Total Word timestamps derived fromcharacter timestamp are :{len(words_timestamp)}, Word timestamps are :{words_timestamp}")
-#creating list from input gif transcript
-gif = "don't let your dreams be dreams"
-giflist = gif.split()
 #getting index of gif words in main transcript
 def get_gif_word_indexes(total_words_list, gif_words_list):
@@ -90,11 +109,9 @@ def get_gif_word_indexes(total_words_list, gif_words_list):
             if total_words_list[idx:idx+lengthgif_words_list] == gif_words_list:
                 yield tuple(range(idx, idx+lengthgif_words_list))
-#getting gif indexes from the generator
-giflist_indxs = list(list(get_gif_word_indexes(words, giflist))[0])
 #getting start and end timestamps for gif transcript
-def get_gif_timestamps(giflist_indxs):
   #giflist_indxs = list(list(get_gif_word_indexes(words, giflist))[0])
   min_idx = min(giflist_indxs)
   max_idx = max(giflist_indxs)
@@ -103,15 +120,52 @@ def get_gif_timestamps(giflist_indxs):
   start_seconds, end_seconds = gif_words_timestamp[0][0], gif_words_timestamp[-1][-1]
   return start_seconds, end_seconds
-#getting start and end timestamps for a gif video
-start_seconds, end_seconds = get_gif_timestamps(giflist_indxs)
 #extracting the  video and building and serving a .gif image
 def generate_gif(start_seconds, end_seconds):
   final_clip = video.subclip(start_seconds, end_seconds)
   #final_clip.write_videofile("/content/gdrive/My Drive/AI/videoedit/gif1.mp4")
-  final_clip.write_gif("/content/gdrive/My Drive/AI/videoedit/gif1.gif",)
   final_clip.close()
   return
-generate_gif(start_seconds, end_seconds)

 import gradio as gr
 import ffmpeg
 from pathlib import Path
 API_URL = "https://api-inference.huggingface.co/models/facebook/wav2vec2-base-960h"
 #headers = {"Authorization": "Bearer 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"}
 HF_TOKEN = os.environ["HF_TOKEN"]
 headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+def generate_transcripts(in_video): #generate_gifs(in_video, gif_transcript):
+    print("********* Inside generate_transcripts() **********")
+    #convert video to audio
+    print(f" input video is : {in_video}")
+    video_path = Path("./ShiaLaBeouf.mp4")
+    audio_memory, _ = ffmpeg.input(video_path).output('-', format="wav", ac=1, ar='16k').overwrite_output().global_args('-loglevel', 'quiet').run(capture_stdout=True)
+    #Getting transcripts using wav2Vec2 huggingface hosted accelerated inference
+    #sending audio file in request along with stride and chunk length information
+    model_response = query_api(audio_memory)
+    #model response has both - transcripts as well as character timestamps or chunks
+    transcription = model_response["text"].lower()
+    chnk = model_response["chunks"]
+    #creating lists from chunks to consume downstream easily
+    timestamps = [[chunk["text"].lower(), chunk["timestamp"][0], chunk["timestamp"][1]]
+              for chunk in chnk]
+    #getting words and word timestamps
+    words, words_timestamp = get_word_timestamps(timestamps)
+    print(f"Total words in the audio transcript is:{len(words)}, transcript word list is :{words}")
+    print(f"Total Word timestamps derived fromcharacter timestamp are :{len(words_timestamp)}, Word timestamps are :{words_timestamp}")
+    return transcription, words, words_timestamp
+def generate_gifs(gif_transcript, words, words_timestamp):
+    print("********* Inside generate_gifs() **********")
+    #creating list from input gif transcript
+    gif = "don't let your dreams be dreams"
+    #gif = gif_transcript
+    giflist = gif.split()
+    #getting gif indexes from the generator
+    giflist_indxs = list(list(get_gif_word_indexes(words, giflist))[0])
+    #getting start and end timestamps for a gif video
+    start_seconds, end_seconds = get_gif_timestamps(giflist_indxs, words_timestamp)
+    #generated .gif image
+    generate_gif(start_seconds, end_seconds)
+    #("./gifimage.gif")
+    html_out = "<img src='./gifimage.gif' />"
+    return html_out
 #calling the hosted model
 def query_api(audio_bytes: bytes):
     """
     json_reponse = json.loads(response.content.decode("utf-8"))
     return json_reponse
+#getting word timestamps from character timestamps
 def get_word_timestamps(timestamps):
   words, word = [], []
   letter_timestamp, word_timestamp, words_timestamp = [], [], []
   words = [word.strip() for word in words]
   return words, words_timestamp
 #getting index of gif words in main transcript
 def get_gif_word_indexes(total_words_list, gif_words_list):
             if total_words_list[idx:idx+lengthgif_words_list] == gif_words_list:
                 yield tuple(range(idx, idx+lengthgif_words_list))
 #getting start and end timestamps for gif transcript
+def get_gif_timestamps(giflist_indxs, words_timestamp):
   #giflist_indxs = list(list(get_gif_word_indexes(words, giflist))[0])
   min_idx = min(giflist_indxs)
   max_idx = max(giflist_indxs)
   start_seconds, end_seconds = gif_words_timestamp[0][0], gif_words_timestamp[-1][-1]
   return start_seconds, end_seconds
 #extracting the  video and building and serving a .gif image
 def generate_gif(start_seconds, end_seconds):
   final_clip = video.subclip(start_seconds, end_seconds)
   #final_clip.write_videofile("/content/gdrive/My Drive/AI/videoedit/gif1.mp4")
+  final_clip.write_gif("./gifimage.gif",)
   final_clip.close()
   return
+sample_video = ['./ShiaLaBeouf.mp4']
+sample_vid = gr.Video(label='Video file')  #for displaying the example
+examples = gr.components.Dataset(components=[sample_vid], samples=[sample_video], type='values')
+demo = gr.Blocks()
+with demo:
+    with gr.Row():
+        input_video = gr.Video(label="Upload a Video", visible=True)  #for incoming video
+        text_transcript = gr.Textbox(label="Transcripts", lines = 10, interactive = True )  #to generate and display transcriptions for input video
+        text_words = gr.Textbox(visible=False)
+        text_wordstimestamps = gr.Textbox(visible=False)
+        text_gif_transcript = gr.Textbox(label="Transcripts", placeholder="Copy paste transcripts here to create GIF image" , lines = 3, interactive = True ) #to copy paste required gif transcript
+        out_gif = gr.HTML(label="Generated GIF from transcript selected", show_label=True)
+        examples.render()
+        def load_examples(video):  #to load sample video into input_video upon clicking on it
+            print("****** inside load_example() ******")
+            print("in_video is : ", video)
+            return video
+        examples.click(load_examples, examples, input_video)
+    with gr.Row():
+        button_transcript = gr.Button("Generate transcripts")
+        button_gifs = gr.Button("Create Gif")
+        #def load_gif():
+        #    print("****** inside load_gif() ******")
+        #    #created embedding  width='560' height='315'
+        #    html_out = "<img src='./gifimage.gif' />"
+        #    print(f"html output is : {html_out}")
+        #    return
+    button_transcript.click(generate_transcripts, input_video, [text_transcript, text_words, text_wordstimestamps ])
+    button_gifs.click(generate_gifs, [text_gif_transcript, text_words, text_wordstimestamps], out_gif )