import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sentence_transformers import SentenceTransformer from tensorflow.keras.models import model_from_json from tensorflow.keras.optimizers import Adam import os import cv2 from PIL import Image import streamlit as st ##################################################################################################################################### st.set_page_config(layout='wide') # Sidebar: logo Artefact + main info on text with st.sidebar: col1, col2, col3 = st.columns(3) with col2: logo_facebook ='static/logo_facebook.png') st.image(logo_facebook) # Checkboxes hateful = st.checkbox('Check to see top hateful words used') if hateful: # Loading some hateful text data df_hate = pd.read_csv('static/data_hate.csv') number_chosen_hate = st.number_input('How many top hateful words do you want to see?', value=5) df_chosen_hate = df_hate.iloc[:number_chosen_hate, :] st.write(f'{number_chosen_hate} most used words in the hateful vocabulary:') st.dataframe(df_chosen_hate) non_hateful = st.checkbox('Check to see top non-hateful words used') if non_hateful: # Loading some non-hateful text data df_no_hate = pd.read_csv('static/data_no_hate.csv') number_chosen = st.number_input('How many top non-hateful words do you want to see?', value=5) df_chosen = df_no_hate.iloc[:number_chosen, :] st.write(f'{number_chosen} most used words in the hateful vocabulary:') st.dataframe(df_chosen) ##################################################################################################################################### st.title('Facebook: Hateful Memes recognition') st.write("---") # Sélection image img_filepath = 'static/images_streamlit' list_images = sorted([img for img in os.listdir(img_filepath)]) st.subheader('Some examples of hateful and non-hateful memes:') with st.expander('Want to see some memes?'): selected_image = st.select_slider('Select a meme to show it', options = [list_images[i] for i in range(10)], value=(list_images[0])) col1, col2, col3 = st.columns(3) with col2: st.image(f'{img_filepath}/{selected_image}') st.write("---") ##################################################################################################################################### # Hateful test st.subheader('Is a word in our hateful vocabulary or not?') with st.expander('Hateful? Non-hateful?'): word = st.text_input('Write a word to test it', 'like') word_lower = word.lower() # Need to reload them in case it was not done in the sidebar df_hate = pd.read_csv('static/data_hate.csv') df_no_hate = pd.read_csv('static/data_no_hate.csv') try: if word_lower not in df_hate['word'].values: st.write(f'"{word}" is not in our hateful vocabulary.') else: appeared_hate = df_hate[df_hate['word'] == word_lower]['count'].values[0] st.write(f'"{word}" is in our hateful vocabulary, it appears {appeared_hate} times.') if word_lower not in df_no_hate['word'].values: st.write(f'"{word}"is not in our non-hateful vocabulary.') else: appeared_no_hate = df_no_hate[df_no_hate['word'] == word_lower]['count'].values[0] st.write(f'"{word}" is in our non-hateful vocabulary, it appears {appeared_no_hate} times.') st.write(f'Ratio hateful vs non-hateful: {round(appeared_hate/appeared_no_hate, 2)}.') except: st.write(f'"{word}" is not in our hateful and non-hateful vocabulary.') st.write("---") ##################################################################################################################################### # Slider to choose how many words we want to see and plot st.subheader('Barplot of top selected words:') with st.expander('Select to choose how many top words you want to see and their count'): option = st.selectbox('Which vocabulary to select?', ('Hateful vocabulary', 'Non-hateful vocabulary', 'Both vocabularies')) st.write('You selected', option) if option == 'Hateful vocabulary': df_hate_subset = df_hate[df_hate.iloc[:, 1] >= 20] start_word, end_word = st.select_slider( 'Select a range of top words', options=[x for x in range(1, df_hate_subset.shape[0]+1)], value=(1, 10)) df_slider_hate = df_hate_subset.iloc[start_word-1:end_word, :] fig, ax = plt.subplots() bars = plt.barh(y=df_slider_hate['word'], width=df_slider_hate['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange']) ax.bar_label(bars) ax = plt.gca().invert_yaxis() st.subheader('Selected words hateful vocabulary:') st.pyplot(fig) elif option == 'Non-hateful vocabulary': df_no_hate_subset = df_no_hate[df_no_hate.iloc[:, 1] >= 30] start_word, end_word = st.select_slider( 'Select a range of top words', options=[x for x in range(1, df_no_hate_subset.shape[0]+1)], value=(1, 10)) df_slider_no_hate = df_no_hate_subset.iloc[start_word-1:end_word, :] fig, ax = plt.subplots() bars = plt.barh(y=df_slider_no_hate['word'], width=df_slider_no_hate['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange']) ax.bar_label(bars) ax = plt.gca().invert_yaxis() st.subheader('Selected words non-hateful vocabulary:') st.pyplot(fig) else: df_top = pd.read_csv('./static/data_top.csv') start_word, end_word = st.select_slider( 'Select a range of top words', options=[x for x in range(1, df_top.shape[0]+1)], value=(1, 10)) df_slider = df_top.iloc[start_word-1:end_word, :] fig, ax = plt.subplots() bars = plt.barh(y=df_slider['word'], width=df_slider['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange']) ax.bar_label(bars) ax = plt.gca().invert_yaxis() st.subheader('Selected words (hateful & non-hateful vocabularies):') st.pyplot(fig) st.write("---") ##################################################################################################################################### # Grad Cam? st.write('Grad Cam if it works') st.write("---") ##################################################################################################################################### # Testing some sentences st.subheader('Testing some sentences if you dare:') with st.expander('Input a sentence and check the probability of it being hateful:'): # Some input model_nlp = SentenceTransformer('all-mpnet-base-v2') sentence = st.text_input('Write a sentence to test it.', "Hopefully I don't write some hateful content.") # Encoding preprocessed_sentence = model_nlp.encode(sentence) preprocessed_sentence = preprocessed_sentence.reshape(1, -1) # load json and create model json_file = open('static/model_nlp/model_nlp.json', 'r') loaded_model_json = json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model loaded_model.load_weights("static/model_nlp/model_nlp.h5") # loaded_model.compile(optimizer=Adam(learning_rate=0.005), loss='binary_crossentropy', metrics=['AUC', 'accuracy']) y_pred = loaded_model.predict(preprocessed_sentence) percentage = y_pred[0][0] * 100 st.write(f'Probability of being hateful: {round(percentage, 2)/100}') if y_pred[0][0] < 0.5: st.write(f"Congrats, it's not hateful!!!") else: st.write(f"Shame on you, it's hateful!!!") st.write("---") ##################################################################################################################################### col1, col2, col3, col4, col5 = st.columns(5) with col5: logo_artefact ='static/logo_artefact.png') st.image(logo_artefact)