merve HF staff commited on
Commit
79ffe5b
β€’
1 Parent(s): 4657400

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -10
app.py CHANGED
@@ -1,15 +1,42 @@
1
- import streamlit as st
2
- import datasets
3
  from datasets import load_dataset
4
- import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- def load_data(dataset_name):
7
- dataset = load_dataset(dataset_name)
8
- return dataset
9
 
 
10
 
11
- dataset = load_dataset("fake_news_english")
12
- df = pd.DataFrame.from_records(dataset)
13
- st.write("loading dataset")
14
  st.dataframe(df)
15
- st.write("loaded")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from datasets import load_dataset
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import re
5
+ import nltk
6
+ from wordcloud import WordCloud, STOPWORDS
7
+ from nltk.corpus import stopwords
8
+ nltk.download("stopwords")
9
+ stop = stopwords.words('english')
10
+
11
+ dataset = load_dataset("huggingartists/gorillaz")
12
+ df = pd.DataFrame.from_dict(dataset["train"])
13
+
14
+ st.dataframe(df)
15
+ st.write("Removed special characters")
16
 
17
+ def standardize(text, remove_digits=True):
18
+ text=re.sub('[^a-zA-Z\d\s]', '',text)
19
+ text = text.lower()
20
 
21
+ return text
22
 
23
+ df.text = df.text.apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
24
+ df.text=df.text.apply(standardize)
 
25
  st.dataframe(df)
26
+
27
+ words = df.text.str.split(expand=True).unstack().value_counts()
28
+ st.bar_chart(words[20:40])
29
+ st.set_option('deprecation.showPyplotGlobalUse', False)
30
+
31
+ import matplotlib.pyplot as plt
32
+ def word_cloud(content, title):
33
+ wc = WordCloud(background_color='white', max_words=200,
34
+ stopwords=STOPWORDS, max_font_size=50)
35
+ wc.generate(" ".join(content.index.values))
36
+ fig = plt.figure(figsize=(16, 13))
37
+ plt.title(title, fontsize=20)
38
+ plt.imshow(wc.recolor(colormap='Pastel2', random_state=42), alpha=0.98)
39
+ plt.axis('off')
40
+ st.pyplot()
41
+
42
+ word_cloud(words, "Word Cloud")