|
import transcript as ts |
|
import ytvideo as vd |
|
import frames as fr |
|
|
|
import lexrank as lr |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getSummaryImage(link, lexrank_switch, rpunkt_switch): |
|
|
|
|
|
|
|
|
|
|
|
if len(link) == 0: |
|
return 'Error: no link provided' |
|
|
|
print('getting transcript using link: ', link) |
|
raw_transcript, type_transcript = ts.get_json_transcript(link,rpunkt_switch) |
|
print('transcript type: ', type_transcript) |
|
|
|
raw_caption = ts.get_caption(raw_transcript) |
|
|
|
|
|
|
|
|
|
pnct_raw_transcript = raw_transcript |
|
pnct_caption = raw_caption |
|
|
|
dict_sentences = ts.getSentences(pnct_raw_transcript) |
|
|
|
concat_list_summary = 'empty' |
|
if lexrank_switch: |
|
|
|
nr_sentences = round(len(dict_sentences)*0.05) |
|
trunc_pnct_caption = ' '.join(dict_sentences.values()) |
|
list_summary = lr.getSummary(trunc_pnct_caption,nr_sentences) |
|
|
|
|
|
|
|
concat_list_summary = '. '.join([str(item) for item in list_summary]).split('. ') |
|
print('zip: '+str(nr_sentences)) |
|
if nr_sentences == 0: |
|
return 'Error: No sentences available', None |
|
else: |
|
concat_list_summary = [*dict_sentences.values()] |
|
|
|
dict_timestamp_summary = ts.getTimestampAtFrameFromSummary(pnct_raw_transcript,dict_sentences,concat_list_summary) |
|
if 'Error' in dict_timestamp_summary: |
|
return dict_timestamp_summary |
|
|
|
result_get_video=vd.get_video(link) |
|
print('video: '+result_get_video) |
|
|
|
proc_list = fr.extractImagesFromVideo(dict_timestamp_summary.keys()) |
|
print('frames: '+str(proc_list)) |
|
|
|
images = ts.getImages(dict_timestamp_summary) |
|
|
|
return images |
|
|
|
|
|
def getSummary(link, lexrank_switch, rpunkt_switch): |
|
|
|
|
|
|
|
|
|
|
|
if len(link) == 0: |
|
return 'Error: no link provided' |
|
|
|
print('getting transcript using link: ', link) |
|
raw_transcript, type_transcript = ts.get_json_transcript(link,rpunkt_switch) |
|
print('transcript type: ', type_transcript) |
|
|
|
raw_caption = ts.get_caption(raw_transcript) |
|
|
|
|
|
|
|
|
|
pnct_raw_transcript = raw_transcript |
|
pnct_caption = raw_caption |
|
|
|
if rpunkt_switch: |
|
|
|
|
|
print('Recovering punctuation from english text...', type_transcript[1]) |
|
|
|
|
|
caption = rp.predict(raw_caption) |
|
pnct_caption = ts.restore_cr(raw_caption,caption) |
|
pnct_raw_transcript = ts.replacePunctuatedText(raw_transcript, pnct_caption) |
|
|
|
dict_sentences = ts.getSentences(pnct_raw_transcript) |
|
|
|
concat_list_summary = 'empty' |
|
if lexrank_switch: |
|
|
|
nr_sentences = round(len(dict_sentences)*0.05) |
|
trunc_pnct_caption = ' '.join(dict_sentences.values()) |
|
list_summary = lr.getSummary(trunc_pnct_caption,nr_sentences) |
|
|
|
|
|
|
|
concat_list_summary = '. '.join([str(item) for item in list_summary]).split('. ') |
|
print('zip: '+str(nr_sentences)) |
|
if nr_sentences == 0: |
|
return 'Error: No sentences available', None |
|
else: |
|
concat_list_summary = [*dict_sentences.values()] |
|
|
|
dict_timestamp_summary = ts.getTimestampAtFrameFromSummary(pnct_raw_transcript,dict_sentences,concat_list_summary) |
|
if 'Error' in dict_timestamp_summary: |
|
return dict_timestamp_summary |
|
|
|
result_get_video=vd.get_video(link) |
|
print('video: '+result_get_video) |
|
|
|
proc_list = fr.extractImagesFromVideo(dict_timestamp_summary.keys()) |
|
print('frames: '+str(proc_list)) |
|
|
|
html_file = ts.convertToHTML(dict_timestamp_summary) |
|
images = ts.getImages(dict_timestamp_summary) |
|
|
|
return html_file, images |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|