MBinAsif commited on
Commit
f81f9d1
1 Parent(s): a89dbc2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Untitled11.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1Y2vv_pZ5nKXKLrXrmsSu6z8hz6ncjWOz
8
+ """
9
+
10
+ import streamlit as st
11
+ from transformers import BartForConditionalGeneration, BartTokenizer
12
+ import nltk
13
+ from nltk.corpus import stopwords
14
+ from nltk.tokenize import word_tokenize
15
+ from wordcloud import WordCloud
16
+ import matplotlib.pyplot as plt
17
+ from nltk.probability import FreqDist
18
+
19
+ nltk.download('punkt')
20
+ nltk.download('stopwords')
21
+
22
+ st.title("NLP Text Analyzer")
23
+
24
+ user_input = st.text_area("Enter your text:", "Type here...")
25
+
26
+ if user_input:
27
+ st.header("Summary:")
28
+
29
+ # Load pre-trained BART model and tokenizer
30
+ model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
31
+ tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
32
+
33
+ # Tokenize the input text
34
+ inputs = tokenizer.encode("summarize: " + user_input, return_tensors="pt", max_length=1024, truncation=True)
35
+
36
+ # Generate the summary
37
+ summary_ids = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
38
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
39
+
40
+ st.write(summary)
41
+
42
+ # Your previous code for creating the Word Cloud plot
43
+ st.header("Word Cloud:")
44
+ wordcloud = WordCloud(stopwords=set(stopwords.words('english')), background_color='white').generate(user_input)
45
+ plt.figure(figsize=(8, 6)) # Adjust the figsize as needed
46
+ plt.imshow(wordcloud, interpolation='bilinear')
47
+ plt.axis("off")
48
+
49
+ # Display the Word Cloud plot using st.pyplot() with the explicit figure object
50
+ st.pyplot(plt.gcf())
51
+
52
+ st.header("Most Common Words:")
53
+ words = word_tokenize(user_input) # Tokenize the user input text
54
+ fdist = nltk.FreqDist(words)
55
+ most_common_words = fdist.most_common(10)
56
+
57
+ # Prepare data for tabular format
58
+ data = {
59
+ "Word": [word[0] for word in most_common_words],
60
+ "Frequency": [word[1] for word in most_common_words]
61
+ }
62
+
63
+ # Display as a table
64
+ st.table(data)