import random import streamlit as st from bs4 import BeautifulSoup from transformers import AutoTokenizer, AutoModelForSequenceClassification from transformers import pipeline from transformers_interpret import SequenceClassificationExplainer model_hub_url = 'https://huggingface.co/ml6team/distilbert-base-german-cased-toxic-comments' model_name = 'ml6team/distilbert-base-german-cased-toxic-comments' about_page_markdown = f"""# ๐Ÿคฌ Toxic Comment Detection Space Made by [ML6](https://ml6.eu/). Token attribution is performed using [transformers-interpret](https://github.com/cdpierse/transformers-interpret). """ regular_emojis = [ '๐Ÿ˜', '๐Ÿ™‚', '๐Ÿ‘ถ', '๐Ÿ˜‡', ] undecided_emojis = [ '๐Ÿคจ', '๐Ÿง', '๐Ÿฅธ', '๐Ÿฅด', '๐Ÿคท', ] potty_mouth_emojis = [ '๐Ÿค', '๐Ÿ‘ฟ', '๐Ÿ˜ก', '๐Ÿคฌ', 'โ˜ ๏ธ', 'โ˜ฃ๏ธ', 'โ˜ข๏ธ', ] # Page setup st.set_page_config( page_title="Toxic Comment Detection Space", page_icon="๐Ÿคฌ", layout="centered", initial_sidebar_state="auto", menu_items={ 'Get help': None, 'Report a bug': None, 'About': about_page_markdown, } ) # Model setup @st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=False) def load_pipeline(): with st.spinner('Loading the model (this might take a while)...'): toxicity_pipeline = pipeline( 'text-classification', model=model_name, tokenizer=model_name) cls_explainer = SequenceClassificationExplainer( toxicity_pipeline.model, toxicity_pipeline.tokenizer) return toxicity_pipeline, cls_explainer toxicity_pipeline, cls_explainer = load_pipeline() # Auxiliary functions def format_explainer_html(html_string): """Extract tokens with attribution-based background color.""" inside_token_prefix = '##' soup = BeautifulSoup(html_string, 'html.parser') p = soup.new_tag('p', attrs={'style': 'color: black; background-color: white;'}) # Select token elements and remove model specific tokens current_word = None for token in soup.find_all('td')[-1].find_all('mark')[1:-1]: text = token.font.text.strip() if text.startswith(inside_token_prefix): text = text[len(inside_token_prefix):] else: # Create a new span for each word (sequence of sub-tokens) if current_word is not None: p.append(current_word) p.append(' ') current_word = soup.new_tag('span') token.attrs['style'] = f"{token.attrs['style']}; padding: 0.2em 0em;" token.string = text current_word.append(token) # Add last word p.append(current_word) # Add left and right-padding to each word for span in p.find_all('span'): span.find_all('mark')[0].attrs['style'] = ( f"{span.find_all('mark')[0].attrs['style']} padding-left: 0.2em;") span.find_all('mark')[-1].attrs['style'] = ( f"{span.find_all('mark')[-1].attrs['style']} padding-right: 0.2em;") return p def classify_comment(comment): """Classify the given comment and augment with additional information.""" result = toxicity_pipeline(comment)[0] # Add explanation result['word_attribution'] = cls_explainer(comment, class_name="non_toxic") result['visualitsation_html'] = cls_explainer.visualize()._repr_html_() result['tokens_with_background'] = format_explainer_html( result['visualitsation_html']) # Choose emoji reaction label, score = result['label'], result['score'] if label == 'toxic' and score > 0.1: emoji = random.choice(potty_mouth_emojis) elif label == 'non_toxic' and score > 0.1: emoji = random.choice(regular_emojis) else: emoji = random.choice(undecided_emojis) result.update({'text': comment, 'emoji': emoji}) # Add result to session st.session_state.results.append(result) # Start session if 'results' not in st.session_state: st.session_state.results = [] # Page st.title('๐Ÿคฌ German Toxic Comment Detection') st.markdown("""This demo showcases the German toxic comment detection model.""") # Introduction st.markdown(f"""The model was trained using a sequence classification task on a combination of multiple German datasets containing toxicity, profanity, and hate speech. For a more comprehensive overview of the model check out the [model card on ๐Ÿค— Model Hub]({model_hub_url}). """) st.markdown("""Enter a comment that you want to classify below. The model will determine the probability that it is toxic and highlights how much each token contributes to its decision: red tokens indicate toxicity whereas green tokens indicate indicate the opposite. Try it yourself! ๐Ÿ‘‡""", unsafe_allow_html=True) # Demo with st.form("german-toxic-comment-detection-input", clear_on_submit=True): text = st.text_area( label='Enter the comment you want to classify below (in German):') _, rightmost_col = st.columns([6,1]) submitted = rightmost_col.form_submit_button("Classify", help="Classify comment") # Listener if submitted: if text: with st.spinner('Analysing comment...'): classify_comment(text) else: st.error('**Error**: No comment to classify. Please provide a comment.') # Results if 'results' in st.session_state and st.session_state.results: first = True for result in st.session_state.results[::-1]: if not first: st.markdown("---") st.markdown(f"Text:\n> {result['text']}") col_1, col_2, col_3 = st.columns([1,2,2]) col_1.metric(label='', value=f"{result['emoji']}") col_2.metric(label='Label', value=f"{result['label']}") col_3.metric(label='Score', value=f"{result['score']:.3f}") st.markdown(f"Token Attribution:\n{result['tokens_with_background']}", unsafe_allow_html=True) first = False