import streamlit as st # from transformers import AutoTokenizer, AutoModelForSequenceClassification from transformers import pipeline title = "Model Exploration" description = "Comparison of hate speech detection models" date = "2022-01-26" thumbnail = "images/huggingface_logo.png" # Creates the forms for receiving multiple inputs to compare for a single # model or one input to compare for two models def run_article(): st.markdown(""" # Making a Hate Speech Detection Model Once the data has been collected using the definitions identified for the task, you can start training your model. At training, the model takes in the data with labels and learns the associated context in the input data for each label. Depending on the task design, the labels may be binary like 'hateful' and 'non-hateful' or multiclass like 'neutral', 'offensive', and 'attack'. When presented with a new input string, the model then predicts the likelihood that the input is classified as each of the available labels and returns the label with the highest likelihood as well as how confident the model is in its selection using a score from 0 to 1. Neural models such as transformers are frequently trained as general language models and then fine-tuned on specific classification tasks. These models can vary in their architecture and the optimization algorithms, sometimes resulting in very different output for the same input text. # Model Output Ranking For now, here's a link to the [space](https://huggingface.co/spaces/aymm/ModelOutputRankingTool).""") with st.expander("Model Output Ranking Tool", expanded=False): with st.form(key='ranking'): model_name = st.selectbox("Select a model to test", ["classla/roberta-base-frenk-hate", "cardiffnlp/twitter-roberta-base-hate", "Hate-speech-CNERG/dehatebert-mono-english"], key="rank_model_select" ) # the placeholder key functionality was added in v1.2 of streamlit # and versions on Spaces currently goesup to v1.0 input_1 = st.text_input("Input 1", #placeholder="We shouldn't let [IDENTITY] suffer.", help= "Try a phrase like 'We shouldn't let [IDENTITY] suffer.'", key="rank_input_1") input_2 = st.text_input("Input 2", #placeholder="I'd rather die than date [IDENTITY].", help= "Try a phrase like 'I'd rather die than date [IDENTITY].'", key="rank_input_2") input_3 = st.text_input("Input 3", #placeholder="Good morning.", help= "Try a phrase like 'Good morning'", key="rank_input_3") inputs = [input_1, input_2, input_3] if st.form_submit_button(label="Rank inputs"): results = run_ranked(model_name, inputs) st.dataframe(results) st.markdown(""" # Model Comparison For now, here's a link to the [space](https://huggingface.co/spaces/aymm/ModelComparisonTool). """) with st.expander("Model Comparison Tool", expanded=False): with st.form(key='comparison'): model_name_1 = st.selectbox("Select a model to compare", ["cardiffnlp/twitter-roberta-base-hate", "Hate-speech-CNERG/dehatebert-mono-english", ], key='compare_model_1' ) model_name_2 = st.selectbox("Select another model to compare", ["cardiffnlp/twitter-roberta-base-hate", "Hate-speech-CNERG/dehatebert-mono-english", ], key='compare_model_2' ) input_text = st.text_input("Comparison input", key="compare_input") if st.form_submit_button(label="Compare models"): results = run_compare(model_name_1, model_name_2, input_text) st.dataframe(results) # Runs the received input strings through the given model and returns the # output ranked by label and score (does not assume binary labels so the # highest score for each label is at the top) def run_ranked(model, input_list): classifier = pipeline("text-classification", model=model) output = [] labels = {} for inputx in input_list: result = classifier(inputx) curr = {} curr['Input'] = inputx label = result[0]['label'] curr['Label'] = label score = result[0]['score'] curr['Score'] = score if label in labels: labels[label].append(curr) else: labels[label] = [curr] for label in labels: sort_list = sorted(labels[label], key=lambda item:item['Score'], reverse=True) output += sort_list return output # Takes in two model names and returns the output of both models for that # given input string def run_compare(name_1, name_2, text): classifier_1 = pipeline("text-classification", model=name_1) result_1 = classifier_1(text) out_1 = {} out_1['Model'] = name_1 out_1['Label'] = result_1[0]['label'] out_1['Score'] = result_1[0]['score'] classifier_2 = pipeline("text-classification", model=name_2) result_2 = classifier_2(text) out_2 = {} out_2['Model'] = name_2 out_2['Label'] = result_2[0]['label'] out_2['Score'] = result_2[0]['score'] return [out_1, out_2] def main(): run_article() main()