|
import streamlit as st |
|
|
|
title = "Welcome Page" |
|
description = "Introduction" |
|
date = "2022-01-26" |
|
thumbnail = "images/waving_hand.png" |
|
|
|
__INTRO_TEXT = """ |
|
Welcome to the Task Exploration Activity for hate speech detection! |
|
In this series of modules, you'll learn about the history of hate speech detection as a task in |
|
the larger pipeline of automatic content moderation (ACM). |
|
You'll also be able to interact with and compare datasets and models built for this task. |
|
|
|
The goal of this exploration is to share the design considerations and challenges faced when using algorithms to detect hate speech. |
|
""" |
|
|
|
__DEF_HATE_SPEECH = """ |
|
Hate speech is hard to define, with definitions shifting across time and location. |
|
In 2019, the United Nations defined hate speech as "any kind of communication in speech, |
|
writing or behaviour, that attacks or uses pejorative or discriminatory language with |
|
reference to a person or a group on the basis of who they are, in other words, based on their religion, |
|
ethnicity, nationality, race, colour, descent, gender or other identity factor." |
|
""" |
|
|
|
__DEF_CONTENT = """ |
|
Different platforms have different guidelines about what |
|
content is sanctioned on the platform. For example, many US-based platforms prohibit posting threats of violence, |
|
nudity, and hate speech. We discuss hate speech below. |
|
""" |
|
|
|
__CONTENT_WARNING = """ |
|
These modules contain examples of hateful, abusive, and offensive language that have be collected in datasets and |
|
reproduced by models. These examples are meant to illustrate the variety of content that may be subject to |
|
moderation. |
|
|
|
""" |
|
|
|
__DATASET_LIST = """ |
|
- [FRENK hate speech dataset](https://huggingface.co/datasets/classla/FRENK-hate-en) |
|
- [Twitter Hate Speech dataset](https://huggingface.co/datasets/tweets_hate_speech_detection) |
|
- [UC Berkley Measuring Hate Speech](https://huggingface.co/datasets/ucberkeley-dlab/measuring-hate-speech) |
|
- [Dynamically Generated Hate Speech Dataset](https://github.com/bvidgen/Dynamically-Generated-Hate-Speech-Dataset) |
|
- [HateCheck](https://github.com/paul-rottger/hatecheck-data) |
|
- [Hateful Memes Dataset](https://huggingface.co/datasets/limjiayi/hateful_memes_expanded) |
|
- [Open Subtitles English Dataset](https://opus.nlpl.eu/OpenSubtitles-v2018.php) |
|
""" |
|
|
|
__MODEL_LIST = """ |
|
- [RoBERTa trained on FRENK dataset](https://huggingface.co/classla/roberta-base-frenk-hate) |
|
- [RoBERTa trained on Twitter Hate Speech](https://huggingface.co/cardiffnlp/twitter-roberta-base-hate) |
|
- [DeHateBERT model (trained on Twitter and StormFront)](https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-english) |
|
- [RoBERTa trained on 11 English hate speech datasets](https://huggingface.co/facebook/roberta-hate-speech-dynabench-r1-target) |
|
- [RoBERTa trained on 11 English hate speech datasets and Round 1 of the Dynamically Generated Hate Speech Dataset](https://huggingface.co/facebook/roberta-hate-speech-dynabench-r2-target) |
|
- [RoBERTa trained on 11 English hate speech datasets and Rounds 1 and 2 of the Dynamically Generated Hate Speech Dataset](https://huggingface.co/facebook/roberta-hate-speech-dynabench-r3-target) |
|
- [RoBERTa trained on 11 English hate speech datasets and Rounds 1, 2, and 3 of the Dynamically Generated Hate Speech Dataset](https://huggingface.co/facebook/roberta-hate-speech-dynabench-r4-target) |
|
""" |
|
|
|
|
|
def run_article(): |
|
st.markdown("# Welcome!") |
|
st.markdown(__INTRO_TEXT) |
|
st.markdown("### What is hate speech?") |
|
st.markdown(__DEF_HATE_SPEECH) |
|
st.markdown("### What kind of content is subject to moderation?") |
|
st.markdown(__DEF_CONTENT) |
|
st.markdown("### Content Warning") |
|
st.markdown(__CONTENT_WARNING) |
|
st.markdown("---\n\n## Featured datasets and models") |
|
col_1, col_2, _ = st.columns(3) |
|
with col_1: |
|
st.markdown("### Datasets") |
|
st.markdown(__DATASET_LIST, unsafe_allow_html=True) |
|
with col_2: |
|
st.markdown("### Models") |
|
st.markdown(__MODEL_LIST, unsafe_allow_html=True) |
|
|