OncUponTim / app.py
ilan541's picture
Update app.py
e1e4e7f
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import gradio as gr
import numpy as np
from scipy.special import softmax
tokenizer = AutoTokenizer.from_pretrained('roberta-base')
from transformers import TFAutoModelForSequenceClassification
model = TFAutoModelForSequenceClassification.from_pretrained("ilan541/OncUponTim")
def split_text(text, nb_splits):
cols = ['split_'+ str(i) for i in range(1, nb_splits+1)]
l = len(text)
chars = int(l/nb_splits)
out = []
for i in range(0, l, chars):
out.append(text[ i : i+chars])
return out
def get_probs(list_of_portions):
y_pred_logits_0 = []
y_pred_logits_1 = []
for text in list_of_portions:
inp = tokenizer(text,
truncation=True,
padding='max_length',
max_length=512,
return_tensors='tf')
y_pred = model(inp)
y_pred_logits_0.append(y_pred.logits[:,0])
y_pred_logits_1.append(y_pred.logits[:,1])
return np.mean(y_pred_logits_0), np.mean(y_pred_logits_1)
def predict(your_text):
# split the text
nb_splits = 3
splits = split_text(your_text, nb_splits)
y_logits_0, y_logits_1 = get_probs(splits)
print('y_logits_0:', y_logits_0)
print('y_logits_1:', y_logits_1)
y_probs_1 = softmax([y_logits_0, y_logits_1])[1]
print('y_probs_1:', y_probs_1)
threshold_value = 0.35
y_pred_1 = (y_probs_1 >= threshold_value)
print('y_pred_1:', y_pred_1)
# inference
if not y_pred_1 :
return 'This content is not of high standard. It needs editing. '
else:
return 'Promising content! Our algorithm predicts it will be very popular.'
iface = gr.Interface(fn=predict, inputs="text", outputs="text")
iface.launch()