Spaces:

irfantea
/

smart-farming-diifbot-kg

Sleeping

App Files Files

smart-farming-diifbot-kg / app.py

irfantea

Update app.py

35651d8 over 1 year ago

raw

history blame

5.03 kB

	import pandas as pd
	import streamlit as st
	import re
	import matplotlib.pyplot as plt
	from textblob import TextBlob
	import nltk
	from nltk.corpus import stopwords
	from collections import Counter
	from wordcloud import WordCloud
	nltk.download('stopwords')

	#Huggingface
	from datasets import load_dataset
	from huggingface_hub import login
	import os

	st.set_page_config(
	page_title="Smart Farming Sentiment Analysis",
	page_icon="🌱",
	layout="wide"
	)

	login(token = os.environ['hf_token'])
	dataset = load_dataset("irfantea/collections", data_files='smartfarmingsentences.csv', split='train')
	df = dataset.to_pandas()
	st.dataframe(df)

	def set_cleantext(dataframe):
	#Sentence less than 10 words
	dataframe = dataframe[dataframe['sentences'].apply(lambda x: len(x.split()) >= 10)]
	#Delete web address
	dataframe.loc[:, 'sentences'] = dataframe['sentences'].str.replace(url_pattern, '', regex=True)
	#Removing empty spaces
	dataframe.loc[:, 'sentences'] = dataframe['sentences'].str.replace('\n', ' ')
	dataframe.loc[:, 'sentences'] = dataframe['sentences'].str.strip()

	#Delete duplicate sentences
	dataframe = dataframe.drop_duplicates(subset=['sentences'])
	dataframe.reset_index(drop=True, inplace=True)

	return dataframe

	def set_textblob(dataframe):
	# apply TextBlob to the value in the column
	text_blob = TextBlob(dataframe['sentences'])
	# add new columns for polarity and subjectivity
	dataframe['polarity'] = text_blob.sentiment.polarity
	dataframe['subjectivity'] = text_blob.sentiment.subjectivity
	return dataframe

	def delete_stopwords(dataframe):
	#Delete stopwords
	stop = stopwords.words('english')
	dataframe['sentences'] = dataframe['sentences'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
	return dataframe

	#df = pd.read_csv("smartfarmingsentences.csv")
	num_ori = df.shape[0]

	st.title("Smart Farming Sentiment Analysis")
	st.subheader("Sentiment Analysis of Smart Farming Knowledge Graph")

	url_pattern = re.compile(r'https?://\S+')
	df = set_cleantext(df)

	num_clean = df.shape[0]

	kolom_num1, kolom_num2, kolom_num3 = st.columns(3)

	with kolom_num1:
	st.text("Original Sentences: " + str(num_ori))
	with kolom_num2:
	st.text("Sentences Count: " + str(num_clean))
	with kolom_num3:
	st.text("Deleted Sentences: " + str(num_ori - num_clean))

	#Sentiment Analysis
	df = df.apply(set_textblob, axis=1)
	st.dataframe(df, use_container_width=True)

	#Separate polarity by Positive, Neutral, Negative
	df_pos = df[df['polarity'] > 0]
	df_neu = df[df['polarity'] == 0]
	df_neg = df[df['polarity'] < 0]

	#Separate subjectivity by Objective, Subjective
	df_obj = df[df['subjectivity'] <= 0.3]
	df_sub = df[df['subjectivity'] > 0.3]

	figp, ax = plt.subplots()
	bars = ax.bar(['Positive', 'Neutral', 'Negative'], [len(df_pos), len(df_neu), len(df_neg)], color=['green', 'gray', 'red'])
	ax.set_xlabel('Sentiment')
	ax.set_ylabel('Count')
	ax.set_title('Sentiment Analysis')

	figs, ax = plt.subplots()
	bars = ax.bar(['Objective', 'Subjective'], [len(df_obj), len(df_sub)], color=['green', 'red'])
	ax.set_xlabel('Subjectivity')
	ax.set_ylabel('Count')
	ax.set_title('Subjectivity Analysis')

	kolom_polar, kolom_subject = st.columns(2)

	with kolom_polar:
	#Show Sentiment Analysis
	st.subheader("Sentiment Analysis")
	st.text("Positive: " + str(df_pos.shape[0]))
	st.text("Neutral: " + str(df_neu.shape[0]))
	st.text("Negative: " + str(df_neg.shape[0]))
	# Create a bar chart
	st.pyplot(figp)
	with kolom_subject:
	#Show Subjectivity Analysis
	st.subheader("Subjectivity Analysis")
	st.text("Objective: " + str(df_obj.shape[0]))
	st.text("Subjective: " + str(df_sub.shape[0]))
	st.text("---")
	# Create a bar chart
	st.pyplot(figs)

	#Make Lowercase
	df['sentences'] = df['sentences'].str.lower()

	#remove punctuation . , ! ? : ; " ' ( ) [ ] { } < > / \ \| ` ~ @ # $ % ^ & * - _ = +
	df['sentences'] = df['sentences'].str.replace('[.,!?;:"\'()\[\]{}<>\\/\|`~@#$%^&*\-_+=]', '')

	#Stopwords
	df = delete_stopwords(df)

	#Delete one or two words
	df['sentences'] = df['sentences'].apply(lambda x: ' '.join([word for word in x.split() if len(word) > 2]))

	#Remove custom words
	custom_words = ["s", "al", 'view', 'article', 'google', 'scholar', "scopus", "crossref"]
	df['sentences'] = df['sentences'].apply(lambda x: ' '.join([word for word in x.split() if word not in (custom_words)]))

	#Make Wordcloud
	all_words = ' '.join([text for text in df['sentences']])
	wordcloud = WordCloud(width=1024, height=1024, random_state=21, max_font_size=110).generate(all_words)
	st.subheader("Wordcloud")
	st.text("Total Words: " + str(len(all_words)))
	plt.figure(figsize=(10, 7))
	plt.imshow(wordcloud, interpolation="bilinear")
	plt.axis('off')
	st.pyplot(plt)

	#Show Top 10 Words
	word_freq = Counter(all_words.split()).most_common(100)
	df_word_freq = pd.DataFrame(word_freq, columns=['Word', 'Frequency'])
	st.subheader("Top 100 Words")
	st.dataframe(df_word_freq.head(100), use_container_width=True)