Spaces:
Sleeping
Sleeping
import gradio as gr | |
import tensorflow as tf | |
model = tf.saved_model.load('arabert_pretrained') | |
from transformers import TFAutoModel, AutoTokenizer | |
arabert_tokenizer = AutoTokenizer.from_pretrained('aubmindlab/bert-base-arabert') | |
import pandas as pd | |
def preprocess_input_data(texts, tokenizer, max_len=120): | |
"""Tokenize and preprocess the input data for Arabert model. | |
Args: | |
texts (list): List of text strings. | |
tokenizer (AutoTokenizer): Arabert tokenizer from transformers library. | |
max_len (int, optional): Maximum sequence length. Defaults to 120. | |
Returns: | |
Tuple of numpy arrays: Input token IDs and attention masks. | |
""" | |
# Tokenize the text data using the tokenizer | |
tokenized_data = [tokenizer.encode_plus( | |
t, | |
max_length=max_len, | |
pad_to_max_length=True, | |
add_special_tokens=True) for t in texts] | |
# Extract tokenized input IDs and attention masks | |
input_ids = [data['input_ids'] for data in tokenized_data] | |
attention_mask = [data['attention_mask'] for data in tokenized_data] | |
return input_ids, attention_mask | |
def sentiment_analysis(text): | |
X_input_ids, X_attention_mask = preprocess_input_data(text, arabert_tokenizer) | |
preds = model(X_input_ids) | |
import numpy as np | |
predicted_classe=list(np.where(preds <0.5,0,1).reshape(len(preds),1)) | |
predicted_class = ''.join(str(x) for x in np.where(preds < 0.5, 0, 1).flatten()) | |
return predicted_class | |
iface = gr.Interface(fn=sentiment_analysis, inputs="text", outputs="text") | |
iface.launch() |