File size: 3,944 Bytes
4b3f952 a0ffacf 4b3f952 a0ffacf 0e8f0e8 36f869a c571946 4b3f952 ec2cbde 4b3f952 aced926 d383c6d a0ffacf 4b3f952 bfcc277 0e8f0e8 a0ffacf a0add13 4b3f952 476a395 0e8f0e8 4b3f952 476a395 0e8f0e8 4b3f952 476a395 4b3f952 36f869a 8fd4c2d 36f869a 476a395 4b3f952 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 28 01:04:50 2022
@author: adeep
"""
from fnmatch import translate
import cv2 as cv
import tempfile
import numpy as np
import pandas as pd
import streamlit as st
import joblib
import os
from moviepy.editor import VideoFileClip
import speech_recognition as sr
from pydub import AudioSegment
from pydub.silence import split_on_silence
import transformers
from transformers import pipeline
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
from nltk.tokenize import sent_tokenize
import re
from utils import get_translation, welcome, get_large_audio_transcription
from PIL import Image
#import stanfordnlp
def main():
st.title("Summarize Text")
video = st.file_uploader("Choose a file", type=['mp4'])
button = st.button("Summarize")
max_c = st.sidebar.slider('Select max words', 50, 500, step=10, value=150)
min_c = st.sidebar.slider('Select min words', 10, 450, step=10, value=50)
gen_summ = False
with st.spinner("Running.."):
if button and video:
tfile = tempfile.NamedTemporaryFile(delete=False)
tfile.write(video.read())
#st.write(tfile.name)
v = VideoFileClip(tfile.name)
v.audio.write_audiofile("movie.wav")
#st.video(video, format="video/mp4", start_time=0)
#st.audio("movie.wav")
whole_text=get_large_audio_transcription("movie.wav")
#st.write(whole_text)
#summarizer = pipeline("summarization")
#summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="pt")
summarizer = pipeline("summarization", model="t5-large", tokenizer="t5-large", framework="pt")
summarized = summarizer(whole_text, min_length=min_c, max_length=max_c)
summ=summarized[0]['summary_text']
#st.write(summ)
gen_summ = True
#stf_nlp = stanfordnlp.Pipeline(processors='tokenize,mwt,pos')
#doc = stf_nlp(summ)
#l=[w.text.capitalize() if w.upos in ["PROPN","NNS"] else w.text for sent in doc.sentences for w in sent.words]
#text=" ".join(l)
#summ=truecasing_by_sentence_segmentation(summ)
sentences = sent_tokenize(summ, language='english')
# capitalize the sentences
sentences_capitalized = [s.capitalize() for s in sentences]
# join the capitalized sentences
summ = re.sub(" (?=[\.,'!?:;])", "", ' '.join(sentences_capitalized))
if 'summary' not in st.session_state:
st.session_state.summary=True
st.session_state.summarization = summ
st.session_state.gen_summ = True
translate = st.sidebar.radio('Do you want to translate the text to any different language?', ('No', 'Yes'))
if 'summary' in st.session_state:
summarized_text = st.session_state.summarization
st.write(summarized_text)
gen_summ = st.session_state.gen_summ
if translate == 'Yes' and gen_summ == True:
lang_list = ['Hindi', 'Marathi', 'Malayalam', 'Kannada', 'Telugu', 'Tamil', 'Oriya', 'Bengali', 'Gujarati', 'Urdu']
s_type = st.sidebar.selectbox('Select the Language in which you want to Translate:',lang_list)
st.sidebar.write('You selected:', s_type)
translation = get_translation(source='English', dest=s_type, text=summarized_text)
st.sidebar.write(translation)
elif translate == 'Yes' and gen_summ == False:
st.error("The summary has not been generated yet. Please generate the summary first and then translate")
else:
st.write('')
if __name__ == '__main__':
main()
|