|
from fnmatch import translate |
|
import cv2 as cv |
|
import tempfile |
|
import numpy as np |
|
import pandas as pd |
|
import streamlit as st |
|
import joblib |
|
import os |
|
from moviepy.editor import VideoFileClip |
|
import speech_recognition as sr |
|
from pydub import AudioSegment |
|
from pydub.silence import split_on_silence |
|
import transformers |
|
from transformers import pipeline |
|
import nltk |
|
nltk.download('punkt') |
|
nltk.download('averaged_perceptron_tagger') |
|
import nltk |
|
nltk.download('punkt') |
|
nltk.download('averaged_perceptron_tagger') |
|
from nltk.tokenize import sent_tokenize |
|
import re |
|
from utils import get_translation, welcome, get_large_audio_transcription |
|
|
|
from PIL import Image |
|
|
|
|
|
|
|
def main(): |
|
|
|
|
|
st.title("Summarize Text") |
|
video = st.file_uploader("Choose a file", type=['mp4']) |
|
button = st.button("Summarize") |
|
|
|
max_c = st.sidebar.slider('Select max words', 50, 500, step=10, value=150) |
|
min_c = st.sidebar.slider('Select min words', 10, 450, step=10, value=50) |
|
gen_summ = False |
|
|
|
|
|
|
|
with st.spinner("Running.."): |
|
|
|
if button and video: |
|
tfile = tempfile.NamedTemporaryFile(delete=False) |
|
tfile.write(video.read()) |
|
|
|
v = VideoFileClip(tfile.name) |
|
v.audio.write_audiofile("movie.wav") |
|
|
|
|
|
whole_text=get_large_audio_transcription("movie.wav") |
|
|
|
|
|
|
|
summarizer = pipeline("summarization", model="t5-large", tokenizer="t5-large", framework="pt") |
|
summarized = summarizer(whole_text, min_length=min_c, max_length=max_c) |
|
summ=summarized[0]['summary_text'] |
|
|
|
gen_summ = True |
|
|
|
|
|
|
|
|
|
|
|
sentences = sent_tokenize(summ, language='english') |
|
|
|
sentences_capitalized = [s.capitalize() for s in sentences] |
|
|
|
summ = re.sub(" (?=[\.,'!?:;])", "", ' '.join(sentences_capitalized)) |
|
|
|
if 'summary' not in st.session_state: |
|
st.session_state.summary=True |
|
st.session_state.summarization = summ |
|
st.session_state.gen_summ = True |
|
|
|
|
|
|
|
translate = st.sidebar.radio('Do you want to translate the text to any different language?', ('No', 'Yes')) |
|
if 'summary' in st.session_state: |
|
summarized_text = st.session_state.summarization |
|
st.write(summarized_text) |
|
gen_summ = st.session_state.gen_summ |
|
|
|
if translate == 'Yes' and gen_summ == True: |
|
lang_list = ['Hindi', 'Marathi', 'Malayalam', 'Kannada', 'Telugu', 'Tamil', 'Oriya', 'Bengali', 'Gujarati', 'Urdu'] |
|
|
|
s_type = st.sidebar.selectbox('Select the Language in which you want to Translate:',lang_list) |
|
st.sidebar.write('You selected:', s_type) |
|
|
|
|
|
translation = get_translation(source='English', dest=s_type, text=summarized_text) |
|
|
|
st.sidebar.write(translation) |
|
elif translate == 'Yes' and gen_summ == False: |
|
st.error("The summary has not been generated yet. Please generate the summary first and then translate") |
|
|
|
else: |
|
st.write('') |
|
|
|
if __name__ == '__main__': |
|
|
|
main() |