Spaces:
Build error
Build error
File size: 9,381 Bytes
2a78406 9c97bc4 90d19eb 2a78406 cc201b9 9c97bc4 e97bbe0 d93a889 9c97bc4 90a4d51 9c97bc4 04e3933 9c97bc4 04e3933 e97bbe0 672e886 04e3933 c4472df 04e3933 9c97bc4 4dfc7d2 9c97bc4 4dfc7d2 9c97bc4 c105afe cc201b9 c105afe 9c97bc4 c105afe cc201b9 9c97bc4 ae8c816 9c97bc4 cc201b9 001aea6 c3ed386 a6084be e97bbe0 dc291ae f25f59c a6084be 14406c9 3af57d4 9c97bc4 94a93f1 1aa32a9 cc00947 9c97bc4 04e3933 9c97bc4 cc201b9 9c97bc4 5ce127f e6d9153 9c97bc4 e6d9153 7bcb343 8bfdff7 cc201b9 9c5bf65 7bcb343 d5fb975 98603de 9c5bf65 2a78406 742f7ca 2a78406 bdb7bc2 10f6cda ad4e68c 14406c9 412db82 14406c9 2a78406 e6d9153 5b2a72b cc201b9 e6d9153 cc201b9 c3ed386 c1f572c 4ac9312 0c2ffa7 cc201b9 412db82 a6084be 14406c9 c5bbce0 2a78406 c3ed386 cc201b9 a6084be 2fc726f 5862540 e049118 5862540 2fc726f 2a78406 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from transformers import AutoTokenizer
from transformers import pipeline
from transformers import AutoModelForQuestionAnswering
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch
model_ckpt = "deepset/minilm-uncased-squad2"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt)
modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
#input - video link, output - full transcript
def get_transcript(link):
print("******** Inside get_transcript ********")
print(f"link to be extracted is : {link}")
video_id = link.split("=")[1]
# Handle additional query parameters such as timestamp, ...
video_id = video_id.split("&")[0]
print(f"video id extracted is : {video_id}")
transcript = YouTubeTranscriptApi.get_transcript(video_id)
FinalTranscript = ' '.join([i['text'] for i in transcript])
return FinalTranscript,transcript, video_id
#input - question and transcript, output - answer timestamp
def get_answers_timestamp(question, final_transcript, transcript):
print("******** Inside get_answers_timestamp ********")
context = final_transcript
print(f"Input Question is : {question}")
print(f"Type of trancript is : {type(context)}, Length of transcript is : {len(context)}")
inputs = tokenizer(question, context, return_overflowing_tokens=True, max_length=512, stride = 25)
#getting a list of contexts available after striding
contx=[]
for window in inputs["input_ids"]:
#print(f"{tokenizer.decode(window)} \n")
contx.append(tokenizer.decode(window).split('[SEP]')[1].strip())
#print(ques)
#print(contx)
lst=[]
pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
for contexts in contx:
lst.append(pipe(question=question, context=contexts))
print(f"contx list is : {contx}")
lst_scores = [dicts['score'] for dicts in lst]
print(f"lst_scores is : {lst_scores}")
#getting highest and second highest scores
idxmax = lst_scores.index(max(lst_scores))
lst_scores.remove(max(lst_scores))
idxmax2 = lst_scores.index(max(lst_scores))
sentence_for_timestamp = lst[idxmax]['answer']
sentence_for_timestamp_secondbest = lst[idxmax2]['answer']
dftranscript = pd.DataFrame(transcript)
embedding_1= modelST.encode(dftranscript.text, convert_to_tensor=True)
embedding_2 = modelST.encode(sentence_for_timestamp, convert_to_tensor=True)
embedding_3 = modelST.encode(sentence_for_timestamp_secondbest, convert_to_tensor=True)
similarity_tensor = util.pytorch_cos_sim(embedding_1, embedding_2)
idx = torch.argmax(similarity_tensor)
start_timestamp = dftranscript.iloc[[int(idx)-3]].start.values[0]
start_timestamp = round(start_timestamp)
similarity_tensor_secondbest = util.pytorch_cos_sim(embedding_1, embedding_3)
idx_secondbest = torch.argmax(similarity_tensor_secondbest)
start_timestamp_secondbest = dftranscript.iloc[[int(idx_secondbest)-3]].start.values[0]
start_timestamp_secondbest = round(start_timestamp_secondbest)
return start_timestamp, start_timestamp_secondbest
def display_vid(url, question, sample_question=None, example_video=None):
print("******** display_vid ********")
if question == '':
question = sample_question
#get embedding and youtube link for initial video
html_in = "<iframe width='560' height='315' src=" + url + " frameborder='0' allowfullscreen></iframe>"
#print(html)
if len(example_video) !=0 : #is not None:
print(f"example_video is : {example_video}")
url = example_video[0]
#get transcript
final_transcript, transcript, video_id = get_transcript(url)
#get answer timestamp
#input - question and transcript, output - answer timestamp
ans_timestamp, ans_timestamp_secondbest = get_answers_timestamp(question, final_transcript, transcript)
#created embedding width='560' height='315'
html_out = "<iframe width='730' height='400' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
print(f"html output is : {html_out}")
html_out_secondbest = "<iframe width='730' height='400' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp_secondbest) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
if question == '':
print(f"Inside display_vid(), Sample_Question coming from Radio box is BEFORE : {sample_question}")
sample_ques = set_example_question(sample_question)
print(f"Inside display_vid(), Sample Question coming from Radio box is AFTER : {sample_ques}")
else:
sample_ques = question
return html_out, html_out_secondbest, sample_ques, url
def set_example_question(sample_question):
print(f"******* Inside Sample Questions ********")
print(f"Sample Question coming from Radio box is : {sample_question}")
print("What is the Return value : {gr.Radio.update(value=sample_question)}")
return gr.Radio.update(value=sample_question) #input_ques.update(example)
demo = gr.Blocks()
with demo:
gr.Markdown("<h1><center>Ask a Question to a YouTube Video and get the Video played from the answer timestamp</center></h1>")
gr.Markdown(
"""### A Space by [Yuvraj Sharma](https://huggingface.co/ysharma). How many times have you seen a long video/podcast on Youtube and wondered only if there would have been 'explanatory' timestamps it would have been so much better..
**How to use this space:** You can either provide a new YouTube video link or can use the sample video link provided. Then provide a Questions that you would like about exploring the content in the given video.
The App will generate timestamps and Play the video at those timestamps for you in the space provided. You will see two video displays, corresponding to two of the best guesses by the underlying models. Chances are that both videos might start with same timestamp, which will depend on the question and the content in the video, please bear!
Also, couple small caveats -
- The App will perform as good as the available English Transcripts are for the given YouTube Video. If there are no transcripts, the App will not work.
- Please make sure the YouTube video links that you paste here don't have the trailing values like *&t=8077s*
- Lastly, once you have queried a video, you might have to refresh the page for next query (will try and fix this)
**Motivation behind building this App:** When we see a long video without timestamps, we often wonder 'if' the content we are looking for is in there, or 'where' in the video is the content we are looking for? The Idea is that we might have questions like 'Is the xxxx thing covered in this video?', or maybe 'does the host talks about the architecture of the xxxxx model', or maybe 'Does host talk about alien doorway on Mars?' and so on.
**So this App could help you in reaching to that timestamp in 'Record time'!**
**Best part:** You don't even have to move away from the Space tab in your browser as the YouTube video gets played within the given View.
"""
)
with gr.Row():
input_url = gr.Textbox(label="Input a Youtube video link")
input_ques = gr.Textbox(label="Ask a Question")
with gr.Row():
output_vid = gr.HTML(label="Video from timestamp 1", show_label=True)
output_vid_secondbest = gr.HTML(label="Video from timestamp 2", show_label=True)
with gr.Row():
example_question = gr.Dropdown(
["Choose a sample question", "Does video talk about different modalities",
"does the model uses perceiver architecture?",
"when does the video talk about locked image tuning or lit?",
"comparison between gpt3 and jurassic?",
"Has flamingo passed turing test yet?",
"Any funny examples in video?",
"is it possible to download the stylegan model?",
"what was very cool?",
"what is the cool library?"], label= "Choose a sample Question", value=None)
with gr.Row():
example_video = gr.CheckboxGroup( ["https://www.youtube.com/watch?v=smUHQndcmOY"], label= "Choose a sample YouTube video")
b1 = gr.Button("Publish Video")
b1.click(display_vid, inputs=[input_url, input_ques, example_question, example_video], outputs=[output_vid, output_vid_secondbest, input_ques, input_url])
with gr.Row():
gr.Markdown('''
#### Model Credits
1. [Question Answering](https://huggingface.co/deepset/minilm-uncased-squad2)
1. [Sentence Transformer](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
''')
with gr.Row():
gr.Markdown("![visitor badge](https://visitor-badge.glitch.me/badge?page_id=gradio-blocks_ask_questions_to_youtube_videos)")
demo.launch(enable_queue=True, debug=True) |