Spaces:

johnpaulbin
/

beanbox-toxic-demo

Runtime error

App Files Files Community

beanbox-toxic-demo / app.py

johnpaulbin

Update app.py

5efb4cb 7 months ago

raw

history blame contribute delete

No virus

3.7 kB

	import gradio as gr
	import asyncio
	import torch.nn.functional as F
	from torch import nn
	import os
	os.environ['CURL_CA_BUNDLE'] = ''


	from sentence_transformers import SentenceTransformer
	sentencemodel = SentenceTransformer('johnpaulbin/toxic-gte-small-3')

	USE_GPU = False


	""" Use torchMoji to predict emojis from a single text input
	"""

	import numpy as np
	import emoji, json
	from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH
	from torchmoji.sentence_tokenizer import SentenceTokenizer
	from torchmoji.model_def import torchmoji_emojis
	import torch

	# Emoji map in emoji_overview.png
	EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \
	:pensive: :ok_hand: :blush: :heart: :smirk: \
	:grin: :notes: :flushed: :100: :sleeping: \
	:relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \
	:sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \
	:neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \
	:v: :sunglasses: :rage: :thumbsup: :cry: \
	:sleepy: :yum: :triumph: :hand: :mask: \
	:clap: :eyes: :gun: :persevere: :smiling_imp: \
	:sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \
	:wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \
	:angry: :no_good: :muscle: :facepunch: :purple_heart: \
	:sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ')

	def top_elements(array, k):
	ind = np.argpartition(array, -k)[-k:]
	return ind[np.argsort(array[ind])][::-1]


	with open("vocabulary.json", 'r') as f:
	vocabulary = json.load(f)

	st = SentenceTokenizer(vocabulary, 100)

	emojimodel = torchmoji_emojis("pytorch_model.bin")

	if USE_GPU:
	emojimodel.to("cuda:0")

	def deepmojify(sentence, top_n=5, prob_only=False):
	list_emojis = []
	def top_elements(array, k):
	ind = np.argpartition(array, -k)[-k:]
	return ind[np.argsort(array[ind])][::-1]

	tokenized, _, _ = st.tokenize_sentences([sentence])
	tokenized = np.array(tokenized).astype(int) # convert to float first
	if USE_GPU:
	tokenized = torch.tensor(tokenized).cuda() # then convert to PyTorch tensor

	prob = emojimodel.forward(tokenized)[0]
	if not USE_GPU:
	prob = torch.tensor(prob)
	if prob_only:
	return prob
	emoji_ids = top_elements(prob.cpu().numpy(), top_n)
	emojis = map(lambda x: EMOJIS[x], emoji_ids)
	list_emojis.append(emoji.emojize(f"{' '.join(emojis)}", language='alias'))
	# returning the emojis as a list named as list_emojis
	return list_emojis, prob


	model = nn.Sequential(
	nn.Linear(448, 300), # Increase the number of neurons
	nn.ReLU(),
	nn.BatchNorm1d(300), # Batch normalization

	nn.Linear(300, 300), # Increase the number of neurons
	nn.ReLU(),
	nn.BatchNorm1d(300), # Batch normalization

	nn.Linear(300, 200), # Increase the number of neurons
	nn.ReLU(),
	nn.BatchNorm1d(200), # Batch normalization

	nn.Linear(200, 125), # Increase the number of neurons
	nn.ReLU(),
	nn.BatchNorm1d(125), # Batch normalization

	nn.Linear(125, 2),
	nn.Dropout(0.05) # Dropout
	)

	model.load_state_dict(torch.load("large-tuned.pth", map_location=torch.device('cpu')), strict=False)
	model.eval()


	def inf(inpt):

	TEXT = inpt.lower()
	probs = deepmojify(TEXT, prob_only=True)
	embedding = sentencemodel.encode(TEXT, convert_to_tensor=True)
	INPUT = torch.cat((probs, embedding))
	output = F.softmax(model(INPUT.view(1, -1)), dim=1)
	if not output[0][1] > 0.4:
	return "Not toxic " + str(output[0][0])
	else:
	return "Toxic! " + str(output[0][1])

	iface = gr.Interface(fn=inf, inputs="text", outputs="text")
	iface.queue(concurrency_count=500).launch()