ysharma HF staff commited on
Commit
cc201b9
·
1 Parent(s): 0c2ffa7
Files changed (1) hide show
  1. app.py +39 -15
app.py CHANGED
@@ -7,6 +7,11 @@ import pandas as pd
7
  from sentence_transformers import SentenceTransformer, util
8
  import torch
9
 
 
 
 
 
 
10
  #input - video link, output - full transcript
11
  def get_transcript(link):
12
  print("******** Inside get_transcript ********")
@@ -21,8 +26,8 @@ def get_transcript(link):
21
  #input - question and transcript, output - answer timestamp
22
  def get_answers_timestamp(question, final_transcript, transcript):
23
  print("******** Inside get_answers_timestamp ********")
24
- model_ckpt = "deepset/minilm-uncased-squad2"
25
- tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
26
  #question = "any funny examples in video??"
27
  context = final_transcript
28
  print(f"Input Question is : {question}")
@@ -37,7 +42,7 @@ def get_answers_timestamp(question, final_transcript, transcript):
37
  #print(ques)
38
  #print(contx)
39
 
40
- model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt)
41
  lst=[]
42
  pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
43
  for contexts in contx:
@@ -51,19 +56,26 @@ def get_answers_timestamp(question, final_transcript, transcript):
51
  idxmax2 = lst_scores.index(max(lst_scores))
52
 
53
  sentence_for_timestamp = lst[idxmax]['answer']
 
54
 
55
  dftranscript = pd.DataFrame(transcript)
56
 
57
- modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
58
  embedding_1= modelST.encode(dftranscript.text, convert_to_tensor=True)
59
  embedding_2 = modelST.encode(sentence_for_timestamp, convert_to_tensor=True)
60
-
 
61
  similarity_tensor = util.pytorch_cos_sim(embedding_1, embedding_2)
62
  idx = torch.argmax(similarity_tensor)
63
  start_timestamp = dftranscript.iloc[[int(idx)-3]].start.values[0]
64
  start_timestamp = round(start_timestamp)
65
 
66
- return start_timestamp
 
 
 
 
 
67
 
68
 
69
  def display_vid(url, question, sample_question=None, example_video=None):
@@ -83,11 +95,12 @@ def display_vid(url, question, sample_question=None, example_video=None):
83
 
84
  #get answer timestamp
85
  #input - question and transcript, output - answer timestamp
86
- ans_timestamp = get_answers_timestamp(question, final_transcript, transcript)
87
 
88
  #created embedding
89
  html_out = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
90
  print(f"html output is : {html_out}")
 
91
 
92
  if question == '':
93
  print(f"Inside display_vid(), Sample_Question coming from Radio box is BEFORE : {sample_question}")
@@ -95,7 +108,7 @@ def display_vid(url, question, sample_question=None, example_video=None):
95
  print(f"Inside display_vid(), Sample Question coming from Radio box is AFTER : {sample_ques}")
96
  else:
97
  sample_ques = question
98
- return html_out, sample_ques, url
99
 
100
  def set_example_question(sample_question):
101
  print(f"******* Inside Sample Questions ********")
@@ -114,17 +127,20 @@ with demo:
114
  with gr.Row():
115
  input_url = gr.Textbox(label="Input a Youtube video link") #gr.HTML(placeholder="Enter a video link here..")
116
  input_ques = gr.Textbox(label="Ask a Question")
 
 
117
  output_vid = gr.HTML(label="Video will play at the answer timestamp")
118
-
 
119
  with gr.Row():
120
  example_question = gr.Dropdown(
121
  ["Choose a sample question", "Does video talk about different modalities",
122
  "does the model uses perceiver architecture?",
123
- "how was the data collected for flamingo?",
124
  "when does the video talk about locked image tuning or lit?",
125
- "comparison of clip and lit?",
126
- "when does jurassic model starts?",
127
- "when does miracle model starts?",
128
  "comparison between gpt3 and jurassic?",
129
  #"Can the model do classification",
130
  #"Does the model pushes state of the art in image classification",
@@ -133,13 +149,21 @@ with demo:
133
  "Has flamingo passed turing test yet?",
134
  #"Are there cool examples from flamingo in the video?",
135
  #"Does the video talk about cat?",
136
- "Any funny examples in video?"], label= "Choose a sample Question", value=None)
 
 
 
 
 
 
 
 
137
  with gr.Row():
138
  example_video = gr.CheckboxGroup( ["https://www.youtube.com/watch?v=smUHQndcmOY"], label= "Choose a sample YouTube video") #, value="Any funny examples in video?")
139
  #example_question.update(set_example_question) #,inputs=example_question, outputs= input_url) #example_styles.components)
140
 
141
  b1 = gr.Button("Publish Video")
142
 
143
- b1.click(display_vid, inputs=[input_url, input_ques, example_question, example_video], outputs=[output_vid, input_ques, input_url])
144
 
145
  demo.launch(enable_queue=True, debug=True)
 
7
  from sentence_transformers import SentenceTransformer, util
8
  import torch
9
 
10
+ model_ckpt = "deepset/minilm-uncased-squad2"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
12
+ model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt)
13
+ modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
14
+
15
  #input - video link, output - full transcript
16
  def get_transcript(link):
17
  print("******** Inside get_transcript ********")
 
26
  #input - question and transcript, output - answer timestamp
27
  def get_answers_timestamp(question, final_transcript, transcript):
28
  print("******** Inside get_answers_timestamp ********")
29
+ #model_ckpt = "deepset/minilm-uncased-squad2" >>>>>>>>>
30
+ #tokenizer = AutoTokenizer.from_pretrained(model_ckpt) >>>>>>>>>>>>
31
  #question = "any funny examples in video??"
32
  context = final_transcript
33
  print(f"Input Question is : {question}")
 
42
  #print(ques)
43
  #print(contx)
44
 
45
+ #model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt) >>>>>>>>>>>>>>
46
  lst=[]
47
  pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
48
  for contexts in contx:
 
56
  idxmax2 = lst_scores.index(max(lst_scores))
57
 
58
  sentence_for_timestamp = lst[idxmax]['answer']
59
+ sentence_for_timestamp_secondbest = lst[idxmax2]['answer']
60
 
61
  dftranscript = pd.DataFrame(transcript)
62
 
63
+ #modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') >>>>>>>>>>>>>>>>
64
  embedding_1= modelST.encode(dftranscript.text, convert_to_tensor=True)
65
  embedding_2 = modelST.encode(sentence_for_timestamp, convert_to_tensor=True)
66
+ embedding_3 = modelST.encode(sentence_for_timestamp_secondbest, convert_to_tensor=True)
67
+
68
  similarity_tensor = util.pytorch_cos_sim(embedding_1, embedding_2)
69
  idx = torch.argmax(similarity_tensor)
70
  start_timestamp = dftranscript.iloc[[int(idx)-3]].start.values[0]
71
  start_timestamp = round(start_timestamp)
72
 
73
+ similarity_tensor_secondbest = util.pytorch_cos_sim(embedding_1, embedding_3)
74
+ idx_secondbest = torch.argmax(similarity_tensor_secondbest)
75
+ start_timestamp_secondbest = dftranscript.iloc[[int(idx_secondbest)-3]].start.values[0]
76
+ start_timestamp_secondbest = round(start_timestamp_secondbest)
77
+
78
+ return start_timestamp, start_timestamp_secondbest
79
 
80
 
81
  def display_vid(url, question, sample_question=None, example_video=None):
 
95
 
96
  #get answer timestamp
97
  #input - question and transcript, output - answer timestamp
98
+ ans_timestamp, ans_timestamp_secondbest = get_answers_timestamp(question, final_transcript, transcript)
99
 
100
  #created embedding
101
  html_out = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
102
  print(f"html output is : {html_out}")
103
+ html_out_secondbest = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
104
 
105
  if question == '':
106
  print(f"Inside display_vid(), Sample_Question coming from Radio box is BEFORE : {sample_question}")
 
108
  print(f"Inside display_vid(), Sample Question coming from Radio box is AFTER : {sample_ques}")
109
  else:
110
  sample_ques = question
111
+ return html_out, html_out_secondbest, sample_ques, url
112
 
113
  def set_example_question(sample_question):
114
  print(f"******* Inside Sample Questions ********")
 
127
  with gr.Row():
128
  input_url = gr.Textbox(label="Input a Youtube video link") #gr.HTML(placeholder="Enter a video link here..")
129
  input_ques = gr.Textbox(label="Ask a Question")
130
+
131
+ with gr.Row():
132
  output_vid = gr.HTML(label="Video will play at the answer timestamp")
133
+ output_vid_secondbest = gr.HTML(label="Video will play at the answer timestamp")
134
+
135
  with gr.Row():
136
  example_question = gr.Dropdown(
137
  ["Choose a sample question", "Does video talk about different modalities",
138
  "does the model uses perceiver architecture?",
139
+ #"how was the data collected for flamingo?",
140
  "when does the video talk about locked image tuning or lit?",
141
+ #"comparison of clip and lit?",
142
+ #"when does jurassic model starts?",
143
+ #"when does miracle model starts?",
144
  "comparison between gpt3 and jurassic?",
145
  #"Can the model do classification",
146
  #"Does the model pushes state of the art in image classification",
 
149
  "Has flamingo passed turing test yet?",
150
  #"Are there cool examples from flamingo in the video?",
151
  #"Does the video talk about cat?",
152
+ "Any funny examples in video?",
153
+ "is there a demo of jurassic?",
154
+ "is it possible to download the stylegan model?",
155
+ "does the video covers graph neural network ?",
156
+ "what was very cool?",
157
+ "does yannic like jax?",
158
+ "were there any book suggestions?",
159
+ "does the video discuss multilingual language models?",
160
+ "whar is the cool library?"], label= "Choose a sample Question", value=None)
161
  with gr.Row():
162
  example_video = gr.CheckboxGroup( ["https://www.youtube.com/watch?v=smUHQndcmOY"], label= "Choose a sample YouTube video") #, value="Any funny examples in video?")
163
  #example_question.update(set_example_question) #,inputs=example_question, outputs= input_url) #example_styles.components)
164
 
165
  b1 = gr.Button("Publish Video")
166
 
167
+ b1.click(display_vid, inputs=[input_url, input_ques, example_question, example_video], outputs=[output_vid, output_vid_secondbest, input_ques, input_url])
168
 
169
  demo.launch(enable_queue=True, debug=True)