karthik11 commited on
Commit
61445b8
1 Parent(s): 9a605f1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fnmatch import translate
2
+ import cv2 as cv
3
+ import tempfile
4
+ import numpy as np
5
+ import pandas as pd
6
+ import streamlit as st
7
+ import joblib
8
+ import os
9
+ from moviepy.editor import VideoFileClip
10
+ import speech_recognition as sr
11
+ from pydub import AudioSegment
12
+ from pydub.silence import split_on_silence
13
+ import transformers
14
+ from transformers import pipeline
15
+ import nltk
16
+ nltk.download('punkt')
17
+ nltk.download('averaged_perceptron_tagger')
18
+ import nltk
19
+ nltk.download('punkt')
20
+ nltk.download('averaged_perceptron_tagger')
21
+ from nltk.tokenize import sent_tokenize
22
+ import re
23
+ from utils import get_translation, welcome, get_large_audio_transcription
24
+
25
+ from PIL import Image
26
+
27
+ #import stanfordnlp
28
+
29
+ def main():
30
+
31
+
32
+ st.title("Summarize Text")
33
+ video = st.file_uploader("Choose a file", type=['mp4'])
34
+ button = st.button("Summarize")
35
+
36
+ max_c = st.sidebar.slider('Select max words', 50, 500, step=10, value=150)
37
+ min_c = st.sidebar.slider('Select min words', 10, 450, step=10, value=50)
38
+ gen_summ = False
39
+
40
+
41
+
42
+ with st.spinner("Running.."):
43
+
44
+ if button and video:
45
+ tfile = tempfile.NamedTemporaryFile(delete=False)
46
+ tfile.write(video.read())
47
+ #st.write(tfile.name)
48
+ v = VideoFileClip(tfile.name)
49
+ v.audio.write_audiofile("movie.wav")
50
+ #st.video(video, format="video/mp4", start_time=0)
51
+ #st.audio("movie.wav")
52
+ whole_text=get_large_audio_transcription("movie.wav")
53
+ #st.write(whole_text)
54
+ #summarizer = pipeline("summarization")
55
+ #summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="pt")
56
+ summarizer = pipeline("summarization", model="t5-large", tokenizer="t5-large", framework="pt")
57
+ summarized = summarizer(whole_text, min_length=min_c, max_length=max_c)
58
+ summ=summarized[0]['summary_text']
59
+ #st.write(summ)
60
+ gen_summ = True
61
+ #stf_nlp = stanfordnlp.Pipeline(processors='tokenize,mwt,pos')
62
+ #doc = stf_nlp(summ)
63
+ #l=[w.text.capitalize() if w.upos in ["PROPN","NNS"] else w.text for sent in doc.sentences for w in sent.words]
64
+ #text=" ".join(l)
65
+ #summ=truecasing_by_sentence_segmentation(summ)
66
+ sentences = sent_tokenize(summ, language='english')
67
+ # capitalize the sentences
68
+ sentences_capitalized = [s.capitalize() for s in sentences]
69
+ # join the capitalized sentences
70
+ summ = re.sub(" (?=[\.,'!?:;])", "", ' '.join(sentences_capitalized))
71
+
72
+ if 'summary' not in st.session_state:
73
+ st.session_state.summary=True
74
+ st.session_state.summarization = summ
75
+ st.session_state.gen_summ = True
76
+
77
+
78
+
79
+ translate = st.sidebar.radio('Do you want to translate the text to any different language?', ('No', 'Yes'))
80
+ if 'summary' in st.session_state:
81
+ summarized_text = st.session_state.summarization
82
+ st.write(summarized_text)
83
+ gen_summ = st.session_state.gen_summ
84
+
85
+ if translate == 'Yes' and gen_summ == True:
86
+ lang_list = ['Hindi', 'Marathi', 'Malayalam', 'Kannada', 'Telugu', 'Tamil', 'Oriya', 'Bengali', 'Gujarati', 'Urdu']
87
+
88
+ s_type = st.sidebar.selectbox('Select the Language in which you want to Translate:',lang_list)
89
+ st.sidebar.write('You selected:', s_type)
90
+
91
+
92
+ translation = get_translation(source='English', dest=s_type, text=summarized_text)
93
+
94
+ st.sidebar.write(translation)
95
+ elif translate == 'Yes' and gen_summ == False:
96
+ st.error("The summary has not been generated yet. Please generate the summary first and then translate")
97
+
98
+ else:
99
+ st.write('')
100
+
101
+ if __name__ == '__main__':
102
+
103
+ main()