""" Created on Mon Mar 28 01:04:50 2022 @author: adeep """ from fnmatch import translate import cv2 as cv import tempfile import numpy as np import pandas as pd import streamlit as st import joblib import os from moviepy.editor import VideoFileClip import speech_recognition as sr from pydub import AudioSegment from pydub.silence import split_on_silence import transformers from transformers import pipeline import nltk nltk.download('punkt') nltk.download('averaged_perceptron_tagger') import nltk nltk.download('punkt') nltk.download('averaged_perceptron_tagger') from nltk.tokenize import sent_tokenize import re from utils import get_translation, welcome, get_large_audio_transcription from PIL import Image #import stanfordnlp def main(): st.title("Summarize Text") video = st.file_uploader("Choose a file", type=['mp4']) button = st.button("Summarize") max_c = st.sidebar.slider('Select max words', 50, 500, step=10, value=150) min_c = st.sidebar.slider('Select min words', 10, 450, step=10, value=50) gen_summ = False with st.spinner("Running.."): if button and video: tfile = tempfile.NamedTemporaryFile(delete=False) tfile.write(video.read()) #st.write(tfile.name) v = VideoFileClip(tfile.name) v.audio.write_audiofile("movie.wav") #st.video(video, format="video/mp4", start_time=0) #st.audio("movie.wav") whole_text=get_large_audio_transcription("movie.wav") #st.write(whole_text) #summarizer = pipeline("summarization") #summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="pt") summarizer = pipeline("summarization", model="t5-large", tokenizer="t5-large", framework="pt") summarized = summarizer(whole_text, min_length=min_c, max_length=max_c) summ=summarized[0]['summary_text'] #st.write(summ) gen_summ = True #stf_nlp = stanfordnlp.Pipeline(processors='tokenize,mwt,pos') #doc = stf_nlp(summ) #l=[w.text.capitalize() if w.upos in ["PROPN","NNS"] else w.text for sent in doc.sentences for w in sent.words] #text=" ".join(l) #summ=truecasing_by_sentence_segmentation(summ) sentences = sent_tokenize(summ, language='english') # capitalize the sentences sentences_capitalized = [s.capitalize() for s in sentences] # join the capitalized sentences summ = re.sub(" (?=[\.,'!?:;])", "", ' '.join(sentences_capitalized)) if 'summary' not in st.session_state: st.session_state.summary=True st.session_state.summarization = summ st.session_state.gen_summ = True translate = st.sidebar.radio('Do you want to translate the text to any different language?', ('No', 'Yes')) if 'summary' in st.session_state: summarized_text = st.session_state.summarization st.write(summarized_text) gen_summ = st.session_state.gen_summ if translate == 'Yes' and gen_summ == True: lang_list = ['Hindi', 'Marathi', 'Malayalam', 'Kannada', 'Telugu', 'Tamil', 'Oriya', 'Bengali', 'Gujarati', 'Urdu'] s_type = st.sidebar.selectbox('Select the Language in which you want to Translate:',lang_list) st.sidebar.write('You selected:', s_type) translation = get_translation(source='English', dest=s_type, text=summarized_text) st.sidebar.write(translation) elif translate == 'Yes' and gen_summ == False: st.error("The summary has not been generated yet. Please generate the summary first and then translate") else: st.write('') if __name__ == '__main__': main()