import gradio as gr import asyncio import torch.nn.functional as F from torch import nn import os os.environ['CURL_CA_BUNDLE'] = '' from sentence_transformers import SentenceTransformer sentencemodel = SentenceTransformer('johnpaulbin/toxic-gte-small-3') USE_GPU = False """ Use torchMoji to predict emojis from a single text input """ import numpy as np import emoji, json from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH from torchmoji.sentence_tokenizer import SentenceTokenizer from torchmoji.model_def import torchmoji_emojis import torch # Emoji map in emoji_overview.png EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \ :pensive: :ok_hand: :blush: :heart: :smirk: \ :grin: :notes: :flushed: :100: :sleeping: \ :relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \ :sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \ :neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \ :v: :sunglasses: :rage: :thumbsup: :cry: \ :sleepy: :yum: :triumph: :hand: :mask: \ :clap: :eyes: :gun: :persevere: :smiling_imp: \ :sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \ :wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \ :angry: :no_good: :muscle: :facepunch: :purple_heart: \ :sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ') def top_elements(array, k): ind = np.argpartition(array, -k)[-k:] return ind[np.argsort(array[ind])][::-1] with open("vocabulary.json", 'r') as f: vocabulary = json.load(f) st = SentenceTokenizer(vocabulary, 100) emojimodel = torchmoji_emojis("pytorch_model.bin") if USE_GPU: emojimodel.to("cuda:0") def deepmojify(sentence, top_n=5, prob_only=False): list_emojis = [] def top_elements(array, k): ind = np.argpartition(array, -k)[-k:] return ind[np.argsort(array[ind])][::-1] tokenized, _, _ = st.tokenize_sentences([sentence]) tokenized = np.array(tokenized).astype(int) # convert to float first if USE_GPU: tokenized = torch.tensor(tokenized).cuda() # then convert to PyTorch tensor prob = emojimodel.forward(tokenized)[0] if not USE_GPU: prob = torch.tensor(prob) if prob_only: return prob emoji_ids = top_elements(prob.cpu().numpy(), top_n) emojis = map(lambda x: EMOJIS[x], emoji_ids) list_emojis.append(emoji.emojize(f"{' '.join(emojis)}", language='alias')) # returning the emojis as a list named as list_emojis return list_emojis, prob model = nn.Sequential( nn.Linear(448, 300), # Increase the number of neurons nn.ReLU(), nn.BatchNorm1d(300), # Batch normalization nn.Linear(300, 300), # Increase the number of neurons nn.ReLU(), nn.BatchNorm1d(300), # Batch normalization nn.Linear(300, 200), # Increase the number of neurons nn.ReLU(), nn.BatchNorm1d(200), # Batch normalization nn.Linear(200, 125), # Increase the number of neurons nn.ReLU(), nn.BatchNorm1d(125), # Batch normalization nn.Linear(125, 2), nn.Dropout(0.05) # Dropout ) model.load_state_dict(torch.load("large-tuned.pth", map_location=torch.device('cpu')), strict=False) model.eval() def inf(inpt): TEXT = inpt.lower() probs = deepmojify(TEXT, prob_only=True) embedding = sentencemodel.encode(TEXT, convert_to_tensor=True) INPUT = torch.cat((probs, embedding)) output = F.softmax(model(INPUT.view(1, -1)), dim=1) if not output[0][1] > 0.4: return "Not toxic " + str(output[0][0]) else: return "Toxic! " + str(output[0][1]) iface = gr.Interface(fn=inf, inputs="text", outputs="text") iface.queue(concurrency_count=500).launch()