from transformers import AutoTokenizer, SwitchTransformersForConditionalGeneration, AutoModelForSequenceClassification import torch import gradio as gr import argparse from scipy.special import softmax import csv import urllib.request import numpy as np import requests args_dict = dict( EX_LIST = [["This is wonderful!"], ["Nice car"], ["La France est la meilleure équipe du monde"], ["Visca Barca"], ["Hala Madrid"], ["Buongiorno"], # ["Auf einigen deutschen Straßen gibt es kein Radar"], ["Tempo soleggiato in Italia"], ["Bonjour"], ["صباح الخير"], ["اكل زوجتي جميل"], ], #MMiniLM # Load the pretrained model and tokenizer tokenizer_MMiniLM = AutoTokenizer.from_pretrained("Karim-Gamal/MMiniLM-L12-finetuned-emojis-IID-Fed"), model_MMiniLM = AutoModelForSequenceClassification.from_pretrained("Karim-Gamal/MMiniLM-L12-finetuned-emojis-IID-Fed"), #XLM # Load the pretrained model and tokenizer tokenizer_XLM = AutoTokenizer.from_pretrained("Karim-Gamal/XLM-Roberta-finetuned-emojis-IID-Fed"), model_XLM = AutoModelForSequenceClassification.from_pretrained("Karim-Gamal/XLM-Roberta-finetuned-emojis-IID-Fed"), #Bert # Load the pretrained model and tokenizer tokenizer_Bert = AutoTokenizer.from_pretrained("Karim-Gamal/BERT-base-finetuned-emojis-IID-Fed"), model_Bert = AutoModelForSequenceClassification.from_pretrained("Karim-Gamal/BERT-base-finetuned-emojis-IID-Fed"), description = 'Real-time Emoji Prediction', article = '', ) config = argparse.Namespace(**args_dict) # Preprocess text (username and link placeholders) def preprocess(text): text = text.lower() new_text = [] for t in text.split(" "): t = '@user' if t.startswith('@') and len(t) > 1 else t t = '' if t.startswith('http') else t new_text.append(t) # print(" ".join(new_text)) return " ".join(new_text) def test_with_sentance(text ,net ,tokenizer): # text = "good morning" text = preprocess(text) # tc = TweetCleaner(remove_stop_words=True, remove_retweets=False) # print('before : ' ,text) # text = tc.get_cleaned_text(text) # print('after : ' ,text) net.eval() encoded_input = tokenizer.encode(text, padding=True, truncation=True, return_tensors='pt') net.to('cpu') # print(type()) # encoded_input = {k: v.to(DEVICE) for k, v in encoded_input.items()} output = net(encoded_input) scores = output[0][0].detach().numpy() scores = softmax(scores) # download label mapping labels=[] mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/emoji/mapping.txt" with urllib.request.urlopen(mapping_link) as f: html = f.read().decode('utf-8').split("\n") csvreader = csv.reader(html, delimiter='\t') labels = [row[1] for row in csvreader if len(row) > 1] ranking = np.argsort(scores) ranking = ranking[::-1] output_d = {} for i in range(scores.shape[0]): l = labels[ranking[i]] s = scores[ranking[i]] # print(f"{ranking[i]}) {l} {np.round(float(s), 4)}") output_d[l] = np.round(float(s), 4) if i == 2 : # break return output_d # net.to('cuda:0') list_interface = [] list_title = [] # BERT def _method(text): # tokenizer = AutoTokenizer.from_pretrained(config.CHECKPOINT_BERT) # model_loaded = torch.load('/content/NEW_MODELS_Imbalance/Bert/g_ex3_bert_multi_fed_data_epoch_2.pt', map_location=torch.device('cpu')) return test_with_sentance(text , config.model_Bert , config.tokenizer_Bert) # greet("sun") interface = gr.Interface( fn = _method, inputs=gr.Textbox(placeholder="Enter sentence here..."), outputs="label", examples=config.EX_LIST, live = True, title = 'BERT Multilingual', description=config.description, article = '', ) list_interface.append(interface) list_title.append('BERT Multilingual') # XLM def _method(text): # tokenizer = AutoTokenizer.from_pretrained(config.CHECKPOINT_BERT) # model_loaded = torch.load('/content/NEW_MODELS_Imbalance/Bert/g_ex3_bert_multi_fed_data_epoch_2.pt', map_location=torch.device('cpu')) return test_with_sentance(text , config.model_XLM , config.tokenizer_XLM) # greet("sun") interface = gr.Interface( fn = _method, inputs=gr.Textbox(placeholder="Enter sentence here..."), outputs="label", examples=config.EX_LIST, live = True, title = 'XLM Roberta Multilingual', description=config.description, article = '', ) list_interface.append(interface) list_title.append('XLM Roberta Multilingual') # MMiniLM def _method(text): # tokenizer = AutoTokenizer.from_pretrained(config.CHECKPOINT_BERT) # model_loaded = torch.load('/content/NEW_MODELS_Imbalance/Bert/g_ex3_bert_multi_fed_data_epoch_2.pt', map_location=torch.device('cpu')) return test_with_sentance(text , config.model_MMiniLM , config.tokenizer_MMiniLM) # greet("sun") interface = gr.Interface( fn = _method, inputs=gr.Textbox(placeholder="Enter sentence here..."), outputs="label", examples=config.EX_LIST, live = True, title = 'MiniLM Multilingual', description=config.description, article = '', ) list_interface.append(interface) list_title.append('MiniLM Multilingual') # Switch API_URL_Switch = "https://api-inference.huggingface.co/models/Karim-Gamal/switch-base-8-finetuned-SemEval-2018-emojis-IID-Fed" headers_Switch = {"Authorization": "Bearer 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"} def query_Switch(payload): response = requests.post(API_URL_Switch, headers=headers_Switch, json=payload) return response.json() query_Switch({ "inputs": 'test',}) def _method(text): text = preprocess(text) output_temp = query_Switch({ "inputs": text, }) text_to_emoji = {'red' : '❤', 'face': '😍', 'joy':'😂', 'love':'💕', 'fire':'🔥', 'smile':'😊', 'sunglasses':'😎', 'sparkle':'✨', 'blue':'💙', 'kiss':'😘', 'camera':'📷', 'USA':'🇺🇸', 'sun':'☀' , 'purple':'💜', 'blink':'😉', 'hundred':'💯', 'beam':'😁', 'tree':'🎄', 'flash':'📸', 'tongue':'😜'} # Extract the dictionary from the list try: # code that may raise an exception d = output_temp[0] # try: # Extract the text from the 'generated_text' key text = d['generated_text'] # except: # pass # my_dict = {} # my_dict[str(text_to_emoji[text.split(' ')[0]])] = 0.99 return text_to_emoji[text.split(' ')[0]] except: pass # greet("sun") interface = gr.Interface( fn = _method, inputs=gr.Textbox(placeholder="Enter sentence here..."), outputs="text", examples=config.EX_LIST, live = True, title = 'Switch-Base-8', description=config.description, article = '', ) list_interface.append(interface) list_title.append('Switch-Base-8') # About us def _method(input_rating): # tokenizer = AutoTokenizer.from_pretrained(config.CHECKPOINT_BERT) # model_loaded = torch.load('/content/NEW_MODELS_Imbalance/Bert/g_ex3_bert_multi_fed_data_epoch_2.pt', map_location=torch.device('cpu')) if input_rating <=2: return {'🔥': 0.6, '✨': 0.3, '💯': 0.1} elif input_rating <= 4 and input_rating >2: return {'✨': 0.6, '😉': 0.3, '💯': 0.1} elif input_rating >4: return {'😍': 0.6, '💯': 0.3, '💕': 0.1} # return test_with_sentance(text , config.model_loaded_bert_multi_NONIID , config.tokenizer_bert) # greet("sun") interface = gr.Interface( fn = _method, inputs=gr.Slider(1, 5, value=4), outputs="label", # examples=config.EX_LIST, live = True, title = 'About us', description='We don\'t have sad emoji so our rating will always be great. 😂', # CSS Source : https://codepen.io/bibiangel199/pen/warevP article = config.article + '
', ) list_interface.append(interface) list_title.append('About us') demo = gr.TabbedInterface( list_interface, list_title, title='Multilingual Emoji Prediction Using Federated Learning', css='.gradio-container {color : orange}',) # css='.gradio-container {background-color: white; color : orange}',) demo.launch()