File size: 3,944 Bytes
4b3f952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0ffacf
4b3f952
 
 
 
 
 
 
 
a0ffacf
 
0e8f0e8
36f869a
 
 
c571946
4b3f952
 
 
 
 
 
 
 
ec2cbde
4b3f952
 
 
aced926
d383c6d
a0ffacf
4b3f952
bfcc277
0e8f0e8
a0ffacf
 
 
 
a0add13
 
 
 
 
 
 
4b3f952
 
 
476a395
0e8f0e8
4b3f952
 
 
476a395
 
 
 
 
0e8f0e8
4b3f952
 
 
 
 
476a395
4b3f952
 
 
36f869a
8fd4c2d
36f869a
 
476a395
4b3f952
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 28 01:04:50 2022

@author: adeep
"""
from fnmatch import translate
import cv2 as cv
import tempfile
import numpy as np
import pandas as pd
import streamlit as st 
import joblib
import os
from moviepy.editor import VideoFileClip
import speech_recognition as sr
from pydub import AudioSegment
from pydub.silence import split_on_silence
import transformers
from transformers import pipeline
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
from nltk.tokenize import sent_tokenize
import re
from utils import get_translation, welcome, get_large_audio_transcription

from PIL import Image

#import stanfordnlp

def main():
 
    
    st.title("Summarize Text")
    video = st.file_uploader("Choose a file", type=['mp4'])
    button = st.button("Summarize")
    
    max_c = st.sidebar.slider('Select max words', 50, 500, step=10, value=150)
    min_c = st.sidebar.slider('Select min words', 10, 450, step=10, value=50)
    gen_summ = False
    
 

    with st.spinner("Running.."):
    
        if button and video:
            tfile = tempfile.NamedTemporaryFile(delete=False)
            tfile.write(video.read())
            #st.write(tfile.name)
            v = VideoFileClip(tfile.name)
            v.audio.write_audiofile("movie.wav")
            #st.video(video, format="video/mp4", start_time=0)
            #st.audio("movie.wav")
            whole_text=get_large_audio_transcription("movie.wav")
            #st.write(whole_text)
            #summarizer = pipeline("summarization")
            #summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="pt")
            summarizer = pipeline("summarization", model="t5-large", tokenizer="t5-large", framework="pt")
            summarized = summarizer(whole_text, min_length=min_c, max_length=max_c)
            summ=summarized[0]['summary_text']
            #st.write(summ) 
            gen_summ = True
            #stf_nlp = stanfordnlp.Pipeline(processors='tokenize,mwt,pos')
            #doc = stf_nlp(summ)
            #l=[w.text.capitalize() if w.upos in ["PROPN","NNS"] else w.text for sent in doc.sentences for w in sent.words]
            #text=" ".join(l)
            #summ=truecasing_by_sentence_segmentation(summ)  
            sentences = sent_tokenize(summ, language='english')
            # capitalize the sentences
            sentences_capitalized = [s.capitalize() for s in sentences]
            # join the capitalized sentences
            summ = re.sub(" (?=[\.,'!?:;])", "", ' '.join(sentences_capitalized))
            
            if 'summary' not in st.session_state:
                st.session_state.summary=True
                st.session_state.summarization = summ
                st.session_state.gen_summ = True
                


    translate = st.sidebar.radio('Do you want to translate the text to any different language?', ('No', 'Yes'))
    if 'summary' in st.session_state:
            summarized_text = st.session_state.summarization
            st.write(summarized_text)
            gen_summ = st.session_state.gen_summ
            
    if translate == 'Yes' and gen_summ == True:
        lang_list  = ['Hindi', 'Marathi', 'Malayalam', 'Kannada', 'Telugu', 'Tamil', 'Oriya', 'Bengali', 'Gujarati', 'Urdu']
    
        s_type = st.sidebar.selectbox('Select the Language in which you want to Translate:',lang_list) 
        st.sidebar.write('You selected:', s_type)

         
        translation = get_translation(source='English', dest=s_type, text=summarized_text)

        st.sidebar.write(translation)
    elif translate == 'Yes' and gen_summ == False:
        st.error("The summary has not been generated yet. Please generate the summary first and then translate")
      
    else:
         st.write('')
             
if __name__ == '__main__':
    
    main()