Spaces:
Build error
Build error
app.py
CHANGED
@@ -7,6 +7,11 @@ import pandas as pd
|
|
7 |
from sentence_transformers import SentenceTransformer, util
|
8 |
import torch
|
9 |
|
|
|
|
|
|
|
|
|
|
|
10 |
#input - video link, output - full transcript
|
11 |
def get_transcript(link):
|
12 |
print("******** Inside get_transcript ********")
|
@@ -21,8 +26,8 @@ def get_transcript(link):
|
|
21 |
#input - question and transcript, output - answer timestamp
|
22 |
def get_answers_timestamp(question, final_transcript, transcript):
|
23 |
print("******** Inside get_answers_timestamp ********")
|
24 |
-
model_ckpt = "deepset/minilm-uncased-squad2"
|
25 |
-
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
|
26 |
#question = "any funny examples in video??"
|
27 |
context = final_transcript
|
28 |
print(f"Input Question is : {question}")
|
@@ -37,7 +42,7 @@ def get_answers_timestamp(question, final_transcript, transcript):
|
|
37 |
#print(ques)
|
38 |
#print(contx)
|
39 |
|
40 |
-
model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt)
|
41 |
lst=[]
|
42 |
pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
|
43 |
for contexts in contx:
|
@@ -51,19 +56,26 @@ def get_answers_timestamp(question, final_transcript, transcript):
|
|
51 |
idxmax2 = lst_scores.index(max(lst_scores))
|
52 |
|
53 |
sentence_for_timestamp = lst[idxmax]['answer']
|
|
|
54 |
|
55 |
dftranscript = pd.DataFrame(transcript)
|
56 |
|
57 |
-
modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
58 |
embedding_1= modelST.encode(dftranscript.text, convert_to_tensor=True)
|
59 |
embedding_2 = modelST.encode(sentence_for_timestamp, convert_to_tensor=True)
|
60 |
-
|
|
|
61 |
similarity_tensor = util.pytorch_cos_sim(embedding_1, embedding_2)
|
62 |
idx = torch.argmax(similarity_tensor)
|
63 |
start_timestamp = dftranscript.iloc[[int(idx)-3]].start.values[0]
|
64 |
start_timestamp = round(start_timestamp)
|
65 |
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
|
69 |
def display_vid(url, question, sample_question=None, example_video=None):
|
@@ -83,11 +95,12 @@ def display_vid(url, question, sample_question=None, example_video=None):
|
|
83 |
|
84 |
#get answer timestamp
|
85 |
#input - question and transcript, output - answer timestamp
|
86 |
-
ans_timestamp = get_answers_timestamp(question, final_transcript, transcript)
|
87 |
|
88 |
#created embedding
|
89 |
html_out = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
|
90 |
print(f"html output is : {html_out}")
|
|
|
91 |
|
92 |
if question == '':
|
93 |
print(f"Inside display_vid(), Sample_Question coming from Radio box is BEFORE : {sample_question}")
|
@@ -95,7 +108,7 @@ def display_vid(url, question, sample_question=None, example_video=None):
|
|
95 |
print(f"Inside display_vid(), Sample Question coming from Radio box is AFTER : {sample_ques}")
|
96 |
else:
|
97 |
sample_ques = question
|
98 |
-
return html_out, sample_ques, url
|
99 |
|
100 |
def set_example_question(sample_question):
|
101 |
print(f"******* Inside Sample Questions ********")
|
@@ -114,17 +127,20 @@ with demo:
|
|
114 |
with gr.Row():
|
115 |
input_url = gr.Textbox(label="Input a Youtube video link") #gr.HTML(placeholder="Enter a video link here..")
|
116 |
input_ques = gr.Textbox(label="Ask a Question")
|
|
|
|
|
117 |
output_vid = gr.HTML(label="Video will play at the answer timestamp")
|
118 |
-
|
|
|
119 |
with gr.Row():
|
120 |
example_question = gr.Dropdown(
|
121 |
["Choose a sample question", "Does video talk about different modalities",
|
122 |
"does the model uses perceiver architecture?",
|
123 |
-
"how was the data collected for flamingo?",
|
124 |
"when does the video talk about locked image tuning or lit?",
|
125 |
-
"comparison of clip and lit?",
|
126 |
-
"when does jurassic model starts?",
|
127 |
-
"when does miracle model starts?",
|
128 |
"comparison between gpt3 and jurassic?",
|
129 |
#"Can the model do classification",
|
130 |
#"Does the model pushes state of the art in image classification",
|
@@ -133,13 +149,21 @@ with demo:
|
|
133 |
"Has flamingo passed turing test yet?",
|
134 |
#"Are there cool examples from flamingo in the video?",
|
135 |
#"Does the video talk about cat?",
|
136 |
-
"Any funny examples in video?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
with gr.Row():
|
138 |
example_video = gr.CheckboxGroup( ["https://www.youtube.com/watch?v=smUHQndcmOY"], label= "Choose a sample YouTube video") #, value="Any funny examples in video?")
|
139 |
#example_question.update(set_example_question) #,inputs=example_question, outputs= input_url) #example_styles.components)
|
140 |
|
141 |
b1 = gr.Button("Publish Video")
|
142 |
|
143 |
-
b1.click(display_vid, inputs=[input_url, input_ques, example_question, example_video], outputs=[output_vid, input_ques, input_url])
|
144 |
|
145 |
demo.launch(enable_queue=True, debug=True)
|
|
|
7 |
from sentence_transformers import SentenceTransformer, util
|
8 |
import torch
|
9 |
|
10 |
+
model_ckpt = "deepset/minilm-uncased-squad2"
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
|
12 |
+
model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt)
|
13 |
+
modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
14 |
+
|
15 |
#input - video link, output - full transcript
|
16 |
def get_transcript(link):
|
17 |
print("******** Inside get_transcript ********")
|
|
|
26 |
#input - question and transcript, output - answer timestamp
|
27 |
def get_answers_timestamp(question, final_transcript, transcript):
|
28 |
print("******** Inside get_answers_timestamp ********")
|
29 |
+
#model_ckpt = "deepset/minilm-uncased-squad2" >>>>>>>>>
|
30 |
+
#tokenizer = AutoTokenizer.from_pretrained(model_ckpt) >>>>>>>>>>>>
|
31 |
#question = "any funny examples in video??"
|
32 |
context = final_transcript
|
33 |
print(f"Input Question is : {question}")
|
|
|
42 |
#print(ques)
|
43 |
#print(contx)
|
44 |
|
45 |
+
#model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt) >>>>>>>>>>>>>>
|
46 |
lst=[]
|
47 |
pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
|
48 |
for contexts in contx:
|
|
|
56 |
idxmax2 = lst_scores.index(max(lst_scores))
|
57 |
|
58 |
sentence_for_timestamp = lst[idxmax]['answer']
|
59 |
+
sentence_for_timestamp_secondbest = lst[idxmax2]['answer']
|
60 |
|
61 |
dftranscript = pd.DataFrame(transcript)
|
62 |
|
63 |
+
#modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') >>>>>>>>>>>>>>>>
|
64 |
embedding_1= modelST.encode(dftranscript.text, convert_to_tensor=True)
|
65 |
embedding_2 = modelST.encode(sentence_for_timestamp, convert_to_tensor=True)
|
66 |
+
embedding_3 = modelST.encode(sentence_for_timestamp_secondbest, convert_to_tensor=True)
|
67 |
+
|
68 |
similarity_tensor = util.pytorch_cos_sim(embedding_1, embedding_2)
|
69 |
idx = torch.argmax(similarity_tensor)
|
70 |
start_timestamp = dftranscript.iloc[[int(idx)-3]].start.values[0]
|
71 |
start_timestamp = round(start_timestamp)
|
72 |
|
73 |
+
similarity_tensor_secondbest = util.pytorch_cos_sim(embedding_1, embedding_3)
|
74 |
+
idx_secondbest = torch.argmax(similarity_tensor_secondbest)
|
75 |
+
start_timestamp_secondbest = dftranscript.iloc[[int(idx_secondbest)-3]].start.values[0]
|
76 |
+
start_timestamp_secondbest = round(start_timestamp_secondbest)
|
77 |
+
|
78 |
+
return start_timestamp, start_timestamp_secondbest
|
79 |
|
80 |
|
81 |
def display_vid(url, question, sample_question=None, example_video=None):
|
|
|
95 |
|
96 |
#get answer timestamp
|
97 |
#input - question and transcript, output - answer timestamp
|
98 |
+
ans_timestamp, ans_timestamp_secondbest = get_answers_timestamp(question, final_transcript, transcript)
|
99 |
|
100 |
#created embedding
|
101 |
html_out = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
|
102 |
print(f"html output is : {html_out}")
|
103 |
+
html_out_secondbest = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
|
104 |
|
105 |
if question == '':
|
106 |
print(f"Inside display_vid(), Sample_Question coming from Radio box is BEFORE : {sample_question}")
|
|
|
108 |
print(f"Inside display_vid(), Sample Question coming from Radio box is AFTER : {sample_ques}")
|
109 |
else:
|
110 |
sample_ques = question
|
111 |
+
return html_out, html_out_secondbest, sample_ques, url
|
112 |
|
113 |
def set_example_question(sample_question):
|
114 |
print(f"******* Inside Sample Questions ********")
|
|
|
127 |
with gr.Row():
|
128 |
input_url = gr.Textbox(label="Input a Youtube video link") #gr.HTML(placeholder="Enter a video link here..")
|
129 |
input_ques = gr.Textbox(label="Ask a Question")
|
130 |
+
|
131 |
+
with gr.Row():
|
132 |
output_vid = gr.HTML(label="Video will play at the answer timestamp")
|
133 |
+
output_vid_secondbest = gr.HTML(label="Video will play at the answer timestamp")
|
134 |
+
|
135 |
with gr.Row():
|
136 |
example_question = gr.Dropdown(
|
137 |
["Choose a sample question", "Does video talk about different modalities",
|
138 |
"does the model uses perceiver architecture?",
|
139 |
+
#"how was the data collected for flamingo?",
|
140 |
"when does the video talk about locked image tuning or lit?",
|
141 |
+
#"comparison of clip and lit?",
|
142 |
+
#"when does jurassic model starts?",
|
143 |
+
#"when does miracle model starts?",
|
144 |
"comparison between gpt3 and jurassic?",
|
145 |
#"Can the model do classification",
|
146 |
#"Does the model pushes state of the art in image classification",
|
|
|
149 |
"Has flamingo passed turing test yet?",
|
150 |
#"Are there cool examples from flamingo in the video?",
|
151 |
#"Does the video talk about cat?",
|
152 |
+
"Any funny examples in video?",
|
153 |
+
"is there a demo of jurassic?",
|
154 |
+
"is it possible to download the stylegan model?",
|
155 |
+
"does the video covers graph neural network ?",
|
156 |
+
"what was very cool?",
|
157 |
+
"does yannic like jax?",
|
158 |
+
"were there any book suggestions?",
|
159 |
+
"does the video discuss multilingual language models?",
|
160 |
+
"whar is the cool library?"], label= "Choose a sample Question", value=None)
|
161 |
with gr.Row():
|
162 |
example_video = gr.CheckboxGroup( ["https://www.youtube.com/watch?v=smUHQndcmOY"], label= "Choose a sample YouTube video") #, value="Any funny examples in video?")
|
163 |
#example_question.update(set_example_question) #,inputs=example_question, outputs= input_url) #example_styles.components)
|
164 |
|
165 |
b1 = gr.Button("Publish Video")
|
166 |
|
167 |
+
b1.click(display_vid, inputs=[input_url, input_ques, example_question, example_video], outputs=[output_vid, output_vid_secondbest, input_ques, input_url])
|
168 |
|
169 |
demo.launch(enable_queue=True, debug=True)
|