app file
Browse files- app.py +2 -2
- summarizer.py +2 -1
- transcript.py +9 -3
app.py
CHANGED
@@ -37,7 +37,7 @@ def image_mod(link):
|
|
37 |
json_file = su.getSummary(link, lexrank_switch, rpunkt_switch)
|
38 |
#html, images = su.getSummary(link, lexrank_switch, rpunkt_switch)
|
39 |
#images = su.getSummaryImage(link, lexrank_switch, rpunkt_switch)
|
40 |
-
print(html)
|
41 |
|
42 |
files = os.listdir('workdir/')
|
43 |
print('local files: ',files)
|
@@ -51,7 +51,7 @@ def image_mod(link):
|
|
51 |
|
52 |
#images[0].save("newlion.png")
|
53 |
|
54 |
-
print('images',images)
|
55 |
|
56 |
#return {"html": html, "images":images}
|
57 |
#return HTMLResponse(content=html)
|
|
|
37 |
json_file = su.getSummary(link, lexrank_switch, rpunkt_switch)
|
38 |
#html, images = su.getSummary(link, lexrank_switch, rpunkt_switch)
|
39 |
#images = su.getSummaryImage(link, lexrank_switch, rpunkt_switch)
|
40 |
+
#print(html)
|
41 |
|
42 |
files = os.listdir('workdir/')
|
43 |
print('local files: ',files)
|
|
|
51 |
|
52 |
#images[0].save("newlion.png")
|
53 |
|
54 |
+
#print('images',images)
|
55 |
|
56 |
#return {"html": html, "images":images}
|
57 |
#return HTMLResponse(content=html)
|
summarizer.py
CHANGED
@@ -79,7 +79,8 @@ def getSummary(link, lexrank_switch, rpunkt_switch):
|
|
79 |
return 'Error: no link provided'
|
80 |
|
81 |
print('getting transcript using link: ', link)
|
82 |
-
|
|
|
83 |
print('transcript type: ', type_transcript)
|
84 |
#timestamps = ts.get_timestamps(raw_transcript)
|
85 |
raw_caption = ts.get_caption(raw_transcript)
|
|
|
79 |
return 'Error: no link provided'
|
80 |
|
81 |
print('getting transcript using link: ', link)
|
82 |
+
video_id = get_id_from_link(link)
|
83 |
+
raw_transcript, type_transcript = ts.get_json_transcript(video_id,rpunkt_switch)
|
84 |
print('transcript type: ', type_transcript)
|
85 |
#timestamps = ts.get_timestamps(raw_transcript)
|
86 |
raw_caption = ts.get_caption(raw_transcript)
|
transcript.py
CHANGED
@@ -7,13 +7,19 @@ import base64
|
|
7 |
#transcript_list = YouTubeTranscriptApi.list_transcripts('ReHGSGwV4-A')
|
8 |
#transcript = transcript_list.find_transcript(['en','de'])
|
9 |
|
10 |
-
|
11 |
-
|
|
|
12 |
if "v=" in link:
|
13 |
video_id = link.split("v=")[1].split("&")[0]
|
|
|
14 |
else:
|
15 |
return "Error: Invalid Link, it does not have the pattern 'v=' in it."
|
16 |
-
|
|
|
|
|
|
|
|
|
17 |
|
18 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
19 |
# get the auto-generated english text
|
|
|
7 |
#transcript_list = YouTubeTranscriptApi.list_transcripts('ReHGSGwV4-A')
|
8 |
#transcript = transcript_list.find_transcript(['en','de'])
|
9 |
|
10 |
+
def get_id_from_link(link):
|
11 |
+
video_id = ""
|
12 |
+
|
13 |
if "v=" in link:
|
14 |
video_id = link.split("v=")[1].split("&")[0]
|
15 |
+
elif len(link)==11:
|
16 |
else:
|
17 |
return "Error: Invalid Link, it does not have the pattern 'v=' in it."
|
18 |
+
|
19 |
+
return video_id
|
20 |
+
|
21 |
+
# step 1: download the json transcript for youtube video
|
22 |
+
def get_json_transcript(video_id,rpunkt_switch):
|
23 |
|
24 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
25 |
# get the auto-generated english text
|