Usage

from transformers import AutoTokenizer, AutoModelForSequenceClassification
  
tokenizer = AutoTokenizer.from_pretrained("shahrukhx01/gbert-hasoc-german-2019")

model = AutoModelForSequenceClassification.from_pretrained("shahrukhx01/gbert-hasoc-german-2019")

Dataset

@inproceedings{10.1145/3368567.3368584,
author = {Mandl, Thomas and Modha, Sandip and Majumder, Prasenjit and Patel, Daksh and Dave, Mohana and Mandlia, Chintak and Patel, Aditya},
title = {Overview of the HASOC Track at FIRE 2019: Hate Speech and Offensive Content Identification in Indo-European Languages},
year = {2019},
isbn = {9781450377508},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3368567.3368584},
doi = {10.1145/3368567.3368584},
abstract = {The identification of Hate Speech in Social Media is of great importance and receives much attention in the text classification community. There is a huge demand for research for languages other than English. The HASOC track intends to stimulate development in Hate Speech for Hindi, German and English. Three datasets were developed from Twitter and Facebook and made available. Binary classification and more fine-grained subclasses were offered in 3 subtasks. For all subtasks, 321 experiments were submitted. The approaches used most often were LSTM networks processing word embedding input. The performance of the best system for identification of Hate Speech for English, Hindi, and German was a Marco-F1 score of 0.78, 0.81 and 0.61, respectively.},
booktitle = {Proceedings of the 11th Forum for Information Retrieval Evaluation},
pages = {14–17},
numpages = {4},
keywords = {Text Classification, Hate Speech, Evaluation, Deep Learning},
location = {Kolkata, India},
series = {FIRE '19}
}

shahrukhx01
/

gbert-hasoc-german-2019

Usage

Dataset

license: mit