Spaces:

adrianmoses
/

hate-speech-detection

Runtime error

App Files Files Community

hate-speech-detection / app.py

adrianmoses

update text to caption

9eee20c over 3 years ago

raw

history blame contribute delete

3.04 kB

	import streamlit as st
	import re
	import torch
	from transformers import AlbertTokenizer, AlbertModel
	import pytorch_lightning as pl
	from huggingface_hub import hf_hub_download


	def download_torch_model():
	model_path = hf_hub_download(repo_id="adrianmoses/hate-speech-detection", filename="pytorch_hs_model.net")
	print(model_path)
	return model_path

	def load_model():
	model = AlbertModel.from_pretrained("albert-base-v2")
	return model

	def load_tokenizer():
	tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")
	return tokenizer

	def clean_tweet(tweet):
	return re.sub(r'@\w+:?', "", tweet, flags=re.IGNORECASE)


	def tokenize(tweet):
	tweet = clean_tweet(tweet)
	tokenizer = load_tokenizer()
	return tokenizer(tweet, padding=True, truncation=True, max_length=64, return_tensors='pt')



	class HateSpeechClassifier(pl.LightningModule):

	def __init__(self, albert_model, dropout, hidden_dim, output_dim):
	super().__init__()
	self.model = albert_model
	self.l1 = torch.nn.Linear(hidden_dim, hidden_dim)
	self.dropout = torch.nn.Dropout(dropout)
	self.l2 = torch.nn.Linear(hidden_dim, output_dim)
	self.loss = torch.nn.NLLLoss()



	def forward(self, input_ids, attention_mask, token_type_ids):
	x = self.model(input_ids,
	attention_mask=attention_mask,
	token_type_ids=token_type_ids)[0]
	x = x[:, 0]
	x = self.dropout(torch.relu(self.l1(x)))
	return torch.log_softmax(self.l2(x), dim=1)


	def training_step(self, batch, batch_idx):
	input_ids, attention_masks, token_type_ids, y = batch
	y_hat = self(input_ids, attention_masks, token_type_ids)
	loss = self.loss(y_hat, y.view(-1))
	return loss


	def validation_step(self, batch, batch_idx):
	input_ids, attention_masks, token_type_ids, y = batch
	y_hat = self(input_ids, attention_masks, token_type_ids)
	loss = self.loss(y_hat, y.view(-1))
	return loss


	def configure_optimizers(self):
	return torch.optim.Adam(self.parameters(), lr=1e-5)

	def setup_model():
	torch_model_path = download_torch_model()
	albert_model = load_model()
	model = HateSpeechClassifier(albert_model, 0.5, 768, 2)
	model.load_state_dict(torch.load(torch_model_path, map_location=torch.device('cpu')))
	model.eval()
	return model


	model = setup_model()

	st.title("Hate Speech Detection")
	st.caption("Text will be truncated to 64 tokens")

	text = st.text_input("Enter text")

	encoded_input = tokenize(text)

	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	input_ids = encoded_input['input_ids']
	attention_mask = encoded_input['attention_mask']
	token_type_ids = encoded_input['token_type_ids']

	pred = model(input_ids, attention_mask, token_type_ids)
	print(pred)
	print(pred.data.max(1))
	label = pred.data.max(1)[1]

	print(label)
	is_hate_speech = "YES" if label == 1 else "NO"

	st.write(f"Is this hate speech?: {is_hate_speech}")