imdebamrita's picture
Change in column name
b59f21f
raw
history blame contribute delete
No virus
5.18 kB
from urlextract import URLExtract
from wordcloud import WordCloud
from collections import Counter
import pandas as pd
import emoji
extractor = URLExtract()
def fetch_states(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
# 1. Number of messages
num_messages = df.shape[0]
# 2. Number of words
words = []
for message in df['message']:
words.extend(message.split())
# 3. Number of media
num_media_messages = df[df['message'] == '<Media omitted>\n'].shape[0]
# 4. Number of Links
links = []
for message in df['message']:
links.extend(extractor.find_urls(message))
return num_messages, len(words), num_media_messages, len(links)
def monthly_timeline(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
df['month_num'] = df['date'].dt.month
timeline = df.groupby(['year', 'month_num', 'month']).count()[
'message'].reset_index()
time = []
for i in range(timeline.shape[0]):
time.append(timeline['month'][i] + '-' + str(timeline['year'][i]))
timeline['time'] = time
return timeline.rename(columns={'message': 'Message', 'time': 'Timeline'})
def daily_timeline(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
daily_timeline = df.groupby('only_date').count()['message'].reset_index()
daily_timeline = daily_timeline.rename(
columns={'only_date': 'Date', 'message': 'Message'})
return daily_timeline
def week_activity_map(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
week_activity = df['day_name'].value_counts().reset_index()
week_activity = week_activity.rename(
columns={'day_name': 'Day', 'count': "Message"})
return week_activity
def month_activity_map(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
month_activity = df['month'].value_counts().reset_index()
month_activity = month_activity.rename(
columns={'month': 'Month', 'count': "Message"})
return month_activity
def activity_heatmap(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
user_heatmap = df.pivot_table(
index='day_name', columns='period', values='message', aggfunc='count').fillna(0)
user_heatmap = user_heatmap.rename_axis('Day', axis='index')
user_heatmap = user_heatmap.rename_axis('Time Period', axis='columns')
# user_heatmap = user_heatmap.rename_axis('Message', axis='color')
return user_heatmap
def most_active_user(df):
temp = df[df['user'] != 'group_notification']
x = (temp['user'].value_counts().head()).reset_index().rename(
columns={'user': 'User', 'count': 'Count'})
per = round(((temp['user'].value_counts() / temp.shape[0]) * 100),
2).reset_index().rename(columns={'user': 'User', 'count': 'Percent(%)'})
return x, per
def create_wordcloud(selected_user, df):
f = open('stop_ben-hin-eng.txt', 'r')
stop_words = f.read()
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
temp = df[df['user'] != 'group_notification']
temp = temp[temp['message'] != '<Media omitted>\n']
def remove_stop_words(message):
y = []
for word in message.lower().split():
if word not in stop_words:
y.append(word)
return " ".join(y)
wc = WordCloud(width=500, height=500, min_font_size=10,
background_color='white')
temp['message'] = temp['message'].apply(remove_stop_words)
df_wc = wc.generate(temp['message'].str.cat(sep=" "))
return df_wc
def most_common_words(selected_user, df):
f = open('stop_ben-hin-eng.txt', 'r')
stop_words = f.read()
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
temp = df[df['user'] != 'group_notification']
temp = temp[temp['message'] != '<Media omitted>\n']
words = []
for message in temp['message']:
for word in message.lower().split():
if word not in stop_words:
words.append(word)
most_common_df = pd.DataFrame(Counter(words).most_common(
20)).iloc[::-1].rename(columns={0: 'Message', 1: 'Count'})
return most_common_df
def emoji_data(selected_user, df):
if selected_user != 'Overall':
df = df[df['user'] == selected_user]
emojis = []
for message in df['message']:
emojis.extend([c for c in message if c in emoji.EMOJI_DATA])
emoji_df = pd.DataFrame(Counter(emojis).most_common(
len(Counter(emojis))))
if emojis:
emoji_df = emoji_df.rename(columns={0: 'Emoji', 1: 'Count'})
return emoji_df
def data_timeframe(df):
df_first = df.iloc[0]
df_last = df.iloc[-1]
timeframe = str(df_first['day']) + " " + str(df_first['month']) + " " + str(df_first['year']) + \
" to " + str(df_last['day']) + " " + \
str(df_last['month']) + " " + str(df_last['year'])
return timeframe