Spaces:
Build error
Build error
app.py
CHANGED
@@ -18,18 +18,18 @@ def get_transcript(link):
|
|
18 |
print(f"video id extracted is : {video_id}")
|
19 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
20 |
FinalTranscript = ' '.join([i['text'] for i in transcript])
|
21 |
-
return FinalTranscript, video_id
|
22 |
|
23 |
|
24 |
#input - question and transcript, output - answer timestamp
|
25 |
-
def get_answers_timestamp(question, transcript):
|
26 |
print("******** Inside get_answers_timestamp ********")
|
27 |
model_ckpt = "deepset/minilm-uncased-squad2"
|
28 |
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
|
29 |
#question = "any funny examples in video??"
|
30 |
-
context =
|
31 |
print(f"Input Question is : {question}")
|
32 |
-
print(f"Type of trancript is : {type(
|
33 |
inputs = tokenizer(question, context, return_overflowing_tokens=True, max_length=512, stride = 25)
|
34 |
|
35 |
#overlaps
|
@@ -57,7 +57,7 @@ def get_answers_timestamp(question, transcript):
|
|
57 |
#idxmax, idxmax2
|
58 |
|
59 |
idxcont = lst[idxmax2]['start']
|
60 |
-
answer =
|
61 |
sentence_keyword = answer[:50]
|
62 |
|
63 |
dftranscript = pd.DataFrame(transcript)
|
@@ -85,11 +85,11 @@ def display_vid(url, question):
|
|
85 |
#print(html)
|
86 |
|
87 |
#get transcript
|
88 |
-
transcript, video_id = get_transcript(url)
|
89 |
|
90 |
#get answer timestamp
|
91 |
#input - question and transcript, output - answer timestamp
|
92 |
-
ans_timestamp = get_answers_timestamp(question, transcript)
|
93 |
|
94 |
#created embedding
|
95 |
#sample - smUHQndcmOY?start=234
|
|
|
18 |
print(f"video id extracted is : {video_id}")
|
19 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
20 |
FinalTranscript = ' '.join([i['text'] for i in transcript])
|
21 |
+
return FinalTranscript,transcript, video_id
|
22 |
|
23 |
|
24 |
#input - question and transcript, output - answer timestamp
|
25 |
+
def get_answers_timestamp(question, final_transcript, transcript):
|
26 |
print("******** Inside get_answers_timestamp ********")
|
27 |
model_ckpt = "deepset/minilm-uncased-squad2"
|
28 |
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
|
29 |
#question = "any funny examples in video??"
|
30 |
+
context = final_transcript
|
31 |
print(f"Input Question is : {question}")
|
32 |
+
print(f"Type of trancript is : {type(context)}, Length of transcript is : {len(context)}")
|
33 |
inputs = tokenizer(question, context, return_overflowing_tokens=True, max_length=512, stride = 25)
|
34 |
|
35 |
#overlaps
|
|
|
57 |
#idxmax, idxmax2
|
58 |
|
59 |
idxcont = lst[idxmax2]['start']
|
60 |
+
answer = final_transcript[len(contx[0])-135 + idxcont:]
|
61 |
sentence_keyword = answer[:50]
|
62 |
|
63 |
dftranscript = pd.DataFrame(transcript)
|
|
|
85 |
#print(html)
|
86 |
|
87 |
#get transcript
|
88 |
+
final_transcript, transcript, video_id = get_transcript(url)
|
89 |
|
90 |
#get answer timestamp
|
91 |
#input - question and transcript, output - answer timestamp
|
92 |
+
ans_timestamp = get_answers_timestamp(question, final_transcript, transcript)
|
93 |
|
94 |
#created embedding
|
95 |
#sample - smUHQndcmOY?start=234
|