Spaces:

merve
/

streamlit-dataset-demo

Build error

merve HF staff

Upload app.py

79ffe5b almost 3 years ago

No virus

1.26 kB

	from datasets import load_dataset
	import streamlit as st
	import pandas as pd
	import re
	import nltk
	from wordcloud import WordCloud, STOPWORDS
	from nltk.corpus import stopwords
	nltk.download("stopwords")
	stop = stopwords.words('english')

	dataset = load_dataset("huggingartists/gorillaz")
	df = pd.DataFrame.from_dict(dataset["train"])

	st.dataframe(df)
	st.write("Removed special characters")

	def standardize(text, remove_digits=True):
	text=re.sub('[^a-zA-Z\d\s]', '',text)
	text = text.lower()

	return text

	df.text = df.text.apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
	df.text=df.text.apply(standardize)
	st.dataframe(df)

	words = df.text.str.split(expand=True).unstack().value_counts()
	st.bar_chart(words[20:40])
	st.set_option('deprecation.showPyplotGlobalUse', False)

	import matplotlib.pyplot as plt
	def word_cloud(content, title):
	wc = WordCloud(background_color='white', max_words=200,
	stopwords=STOPWORDS, max_font_size=50)
	wc.generate(" ".join(content.index.values))
	fig = plt.figure(figsize=(16, 13))
	plt.title(title, fontsize=20)
	plt.imshow(wc.recolor(colormap='Pastel2', random_state=42), alpha=0.98)
	plt.axis('off')
	st.pyplot()

	word_cloud(words, "Word Cloud")