Spaces:

Gradio-Blocks
/

SlowMo_n_Timelapse_Your_Video

Runtime error

App Files Files Community

ysharma HF staff commited on May 31, 2022

Commit

237602b

•

1 Parent(s): 5c66834

created app.py

Browse files

Files changed (1) hide show

app.py +303 -0

app.py ADDED Viewed

	@@ -0,0 +1,303 @@

+import gradio as gr
+import ffmpeg
+from pathlib import Path
+import os
+import ast
+import json
+import base64
+import requests
+import moviepy.editor as mp
+from PIL import Image, ImageSequence
+import cv2
+API_URL = "https://api-inference.huggingface.co/models/facebook/wav2vec2-base-960h"
+HF_TOKEN = os.environ["HF_TOKEN"]
+headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+video_list = []
+def generate_transcripts(in_video): #generate_gifs(in_video, gif_transcript):
+    print("********* Inside generate_transcripts() **********")
+    #convert video to audio
+    print(f" input video is : {in_video}")
+    #sample
+    #video_path = Path("./ShiaLaBeouf.mp4")
+    audio_memory, _ = ffmpeg.input(in_video).output('-', format="wav", ac=1, ar='16k').overwrite_output().global_args('-loglevel', 'quiet').run(capture_stdout=True)
+    #audio_memory, _ = ffmpeg.input(video_path).output('-', format="wav", ac=1, ar='16k').overwrite_output().global_args('-loglevel', 'quiet').run(capture_stdout=True)
+    #Getting transcripts using wav2Vec2 huggingface hosted accelerated inference
+    #sending audio file in request along with stride and chunk length information
+    model_response = query_api(audio_memory)
+    #model response has both - transcripts as well as character timestamps or chunks
+    print(f"model_response is : {model_response}")
+    transcription = model_response["text"].lower()
+    chnk = model_response["chunks"]
+    #creating lists from chunks to consume downstream easily
+    timestamps = [[chunk["text"].lower(), chunk["timestamp"][0], chunk["timestamp"][1]]
+              for chunk in chnk]
+    #getting words and word timestamps
+    words, words_timestamp = get_word_timestamps(timestamps)
+    print(f"Total words in the audio transcript is:{len(words)}, transcript word list is :{words}, type of words is :{type(words)} ")
+    print(f"Total Word timestamps derived fromcharacter timestamp are :{len(words_timestamp)}, Word timestamps are :{words_timestamp}")
+    return transcription, words, words_timestamp
+def generate_gifs(in_video, gif_transcript, words, words_timestamp, vid_speed):
+    print("********* Inside generate_gifs() **********")
+    #creating list from input gif transcript
+    #gif = "don't let your dreams be dreams"
+    gif = gif_transcript
+    #gif = gif_transcript
+    giflist = gif.split()
+    #getting gif indexes from the generator
+    # Converting string to list
+    words = ast.literal_eval(words)
+    words_timestamp = ast.literal_eval(words_timestamp)
+    print(f"words is :{words}")
+    print(f"type of words is :{type(words)}")
+    print(f"length of words is :{len(words)}")
+    print(f"giflist is :{giflist}")
+    giflist_indxs = list(list(get_gif_word_indexes(words, giflist))[0])
+    print(f"giflist_indxs is : {giflist_indxs}")
+    #getting start and end timestamps for a gif video
+    start_seconds, end_seconds = get_gif_timestamps(giflist_indxs, words_timestamp)
+    print(f"start_seconds, end_seconds  are : ({start_seconds}, {end_seconds})")
+    #generated .gif image
+    #gif_out, vid_out = gen_moviepy_gif(in_video, start_seconds, end_seconds)
+    print(f"vid_speed from SLider is : {vid_speed}")
+    speededit_vids_list, concat_vid = gen_moviepy_gif(in_video, start_seconds, end_seconds, float(vid_speed), video_list)
+    return concat_vid #speededit_vids_list
+#calling the hosted model
+def query_api(audio_bytes: bytes):
+    """
+    Query for Huggingface Inference API for Automatic Speech Recognition task
+    """
+    print("********* Inside query_api() **********")
+    payload = json.dumps({
+        "inputs": base64.b64encode(audio_bytes).decode("utf-8"),
+        "parameters": {
+            "return_timestamps": "char",
+            "chunk_length_s": 10,
+            "stride_length_s": [4, 2]
+        },
+        "options": {"use_gpu": False}
+    }).encode("utf-8")
+    response = requests.request(
+        "POST", API_URL, headers=headers, data=payload)
+    json_reponse = json.loads(response.content.decode("utf-8"))
+    print(f"json_reponse is :{json_reponse}")
+    return json_reponse
+#getting word timestamps from character timestamps
+def get_word_timestamps(timestamps):
+  words, word = [], []
+  letter_timestamp, word_timestamp, words_timestamp = [], [], []
+  for idx,entry in enumerate(timestamps):
+    word.append(entry[0])
+    letter_timestamp.append(entry[1])
+    if entry[0] == ' ':
+      words.append(''.join(word))
+      word_timestamp.append(letter_timestamp[0])
+      word_timestamp.append(timestamps[idx-1][2])
+      words_timestamp.append(word_timestamp)
+      word, word_timestamp, letter_timestamp = [], [], []
+  words = [word.strip() for word in words]
+  return words, words_timestamp
+#getting index of gif words in main transcript
+def get_gif_word_indexes(total_words_list, gif_words_list):
+    if not gif_words_list:
+        return
+    # just optimization
+    COUNT=0
+    lengthgif_words_list = len(gif_words_list)
+    firstgif_words_list = gif_words_list[0]
+    print(f"total_words_list is :{total_words_list}")
+    print(f"length of total_words_list is :{len(total_words_list)}")
+    print(f"gif_words_list is :{gif_words_list}")
+    print(f"length of gif_words_list is :{len(gif_words_list)}")
+    for idx, item in enumerate(total_words_list):
+        COUNT+=1
+        if item == firstgif_words_list:
+            if total_words_list[idx:idx+lengthgif_words_list] == gif_words_list:
+                print(f"value of tuple is : {tuple(range(idx, idx+lengthgif_words_list))}")
+                yield tuple(range(idx, idx+lengthgif_words_list))
+#getting start and end timestamps for gif transcript
+def get_gif_timestamps(giflist_indxs, words_timestamp):
+  print(f"******** Inside get_gif_timestamps() **********")
+  min_idx = min(giflist_indxs)
+  max_idx = max(giflist_indxs)
+  print(f"min_idx is :{min_idx}")
+  print(f"max_idx is :{max_idx}")
+  gif_words_timestamp = words_timestamp[min_idx : max_idx+1]
+  print(f"words_timestamp is :{words_timestamp}")
+  print(f"gif_words_timestamp is :{gif_words_timestamp}")
+  start_seconds, end_seconds = gif_words_timestamp[0][0], gif_words_timestamp[-1][-1]
+  print(f"start_seconds, end_seconds are :{start_seconds},{end_seconds}")
+  return start_seconds, end_seconds
+#extracting the  video and building and serving a .gif image
+def gen_moviepy_gif(in_video, start_seconds, end_seconds, vid_speed, vid_list):
+  print("******** inside moviepy_gif () ***************")
+  #sample
+  #video_path = "./ShiaLaBeouf.mp4"
+  video = mp.VideoFileClip(in_video)
+  #video = mp.VideoFileClip(video_path)
+  leftover_clip_start = video.subclip(0, int(start_seconds) + float("{:.2f}".format(1-start_seconds%1))).without_audio() #float("{:.2f}".format(1-a%1))
+  final_clip = video.subclip(start_seconds, end_seconds)
+  leftover_clip_end = video.subclip(int(end_seconds) + float("{:.2f}".format(1-end_seconds%1)) ).without_audio() #end=None
+  #slowmo
+  print(f"vid_speed from calling function is : {vid_speed}")
+  speededit_clip = final_clip.fx(mp.vfx.speedx, vid_speed)
+  speededit_clip = speededit_clip.without_audio()
+  #concat
+  concatenated_clip = mp.concatenate_videoclips([leftover_clip_start, speededit_clip, leftover_clip_end])
+  concatenated_clip.write_videofile("concat.mp4")
+  filename = f"speededit{len(vid_list)}"
+  print("filename is :",filename)
+  speededit_clip.write_videofile("speededit.mp4") #(filename)
+  vid_list.append("speededit.mp4") #(filename)
+  if len(vid_list) == 1:
+      speededit_clip.write_videofile("slomo.mp4")
+  elif len(vid_list) == 2:
+      speededit_clip.write_videofile("timelapse.mp4")
+  #writing to RAM - gif and smaller clip
+  #final_clip.write_gif("gifimage.gif") #, program='ffmpeg', tempfiles=True, fps=15, fuzz=3)
+  #final_clip.write_videofile("gifimage.mp4")
+  final_clip.close()
+  #reading in a variable
+  #gif_img = mp.VideoFileClip("gifimage.gif")
+  #gif_vid = mp.VideoFileClip("gifimage.mp4")
+  #im = Image.open("gifimage.gif")
+  #vid_cap = cv2.VideoCapture('gifimage.mp4')
+  return vid_list, "concat.mp4" #"slomo.mp4", "timelapse.mp4", #"gifimage.gif", "gifimage.mp4" #im, gif_img, gif_vid, vid_cap,  #"gifimage.mp4"
+sample_video = ["olympic100m.mp4"] #[['./ShiaLaBeouf.mp4']]
+sample_vid = gr.Video(label='Video file')  #for displaying the example
+examples = gr.components.Dataset(components=[sample_vid], samples=[sample_video], type='values')
+demo = gr.Blocks()
+with demo:
+    gr.Markdown("""# **Watch your video in SloMo or in Timelapse!** """)
+    gr.Markdown("""
+    ### **This is still work under progres......** Editing your video using ASR pipeline..
+    A Space by [Yuvraj Sharma](https://huggingface.co/ysharma).
+    **Background:** In this Gradio BLocks Party Space, I am trying to -
+    - Provide a capability to slow down your video
+    - Timelapse your video
+    **How To Use:** 1. Upload a video or simply click on the sample provided here.
+    2. Then click on 'Generate transcripts' button and first textbox will display the extract Transcript from the audio associated with your sample.
+    3. Clip the text from transcript or type transcripts manually in the second Textbox provided.
+    4. A slowed down or timelapsed version of your video will get generated on the right hand side !
+    Hope you have fun using this 😀
+    """)
+    with gr.Row():
+        #for incoming video
+        input_video = gr.Video(label="Upload a Video", visible=True)
+        #to generate and display transcriptions for input video
+        text_transcript = gr.Textbox(label="Transcripts", lines = 10, interactive = True )
+        #Just to move data between function hence keeping visible false
+        text_words = gr.Textbox(visible=False)
+        text_wordstimestamps = gr.Textbox(visible=False)
+    with gr.Row():
+        button_transcript = gr.Button("Generate transcripts")
+    #For SlowMo
+    with gr.Row():
+        #to copy paste required gif transcript / or to populate by itself on pressing the button
+        text_slomo_transcript = gr.Textbox(label="Transcripts", placeholder="Copy paste transcripts here to create SlowMo Video" , lines = 5, interactive = True )
+        def load_slomo_text(text):
+            print("****** inside load_slomo_text() ******")
+            print("text for slomo video is : ", text)
+            return text
+        text_transcript.change(load_slomo_text, text_transcript, text_slomo_transcript )
+        #out_gif = gr.Image(label="Generated GIF image")
+        out_slomo_vid = gr.Video(label="Generated SlowMo Video")
+    with gr.Row():
+        #button_transcript = gr.Button("Generate transcripts")
+        vid_speed_slomo = gr.Slider(0.1,0.9, step=0.1)
+        button_slomo = gr.Button("Create SloMo")
+    #For TimeLapse
+    with gr.Row():
+        #to copy paste required gif transcript / or to populate by itself on pressing the button
+        text_timelapse_transcript = gr.Textbox(label="Transcripts", placeholder="Copy paste transcripts here to create GIF image" , lines = 5) #, interactive = True )
+        def load_timelapse_text(text):
+            print("****** inside load_timelapse_text() ******")
+            print("text for timelapse video is : ", text)
+            return text
+        text_transcript.change(load_timelapse_text, text_transcript, text_timelapse_transcript )
+        #out_gif = gr.Image(label="Generated GIF image")
+        out_timelapse_vid = gr.Video(label="Generated TimeLapse Video")
+    with gr.Row():
+        #button_transcript = gr.Button("Generate transcripts")
+        vid_speed_timelapse = gr.Slider(1,2, step=0.25)
+        button_timelapse = gr.Button("Create TimeLapse")
+    with gr.Row():
+        #to render video example on mouse hover/click
+        examples.render()
+        #to load sample video into input_video upon clicking on it
+        def load_examples(video):
+            print("****** inside load_example() ******")
+            print("in_video is : ", video[0])
+            return video[0]
+        examples.click(load_examples, examples, input_video)
+        #vid_speed = gr.Slider(0.1,0.9, step=0.1)
+    button_transcript.click(generate_transcripts, input_video, [text_transcript, text_words, text_wordstimestamps ])
+    button_slomo.click(generate_gifs, [input_video, text_slomo_transcript, text_words, text_wordstimestamps, vid_speed_slomo], out_slomo_vid )
+    button_timelapse.click(generate_gifs, [out_slomo_vid, text_timelapse_transcript, text_words, text_wordstimestamps, vid_speed_timelapse], out_timelapse_vid )
+demo.launch(debug=True)