Spaces:
Sleeping
Sleeping
import tensorflow as tf | |
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification | |
import gradio as gr | |
import numpy as np | |
from scipy.special import softmax | |
tokenizer = AutoTokenizer.from_pretrained('roberta-base') | |
from transformers import TFAutoModelForSequenceClassification | |
model = TFAutoModelForSequenceClassification.from_pretrained("ilan541/OncUponTim") | |
def split_text(text, nb_splits): | |
cols = ['split_'+ str(i) for i in range(1, nb_splits+1)] | |
l = len(text) | |
chars = int(l/nb_splits) | |
out = [] | |
for i in range(0, l, chars): | |
out.append(text[ i : i+chars]) | |
return out | |
def get_probs(list_of_portions): | |
y_pred_logits_0 = [] | |
y_pred_logits_1 = [] | |
for text in list_of_portions: | |
inp = tokenizer(text, | |
truncation=True, | |
padding='max_length', | |
max_length=512, | |
return_tensors='tf') | |
y_pred = model(inp) | |
y_pred_logits_0.append(y_pred.logits[:,0]) | |
y_pred_logits_1.append(y_pred.logits[:,1]) | |
return np.mean(y_pred_logits_0), np.mean(y_pred_logits_1) | |
def predict(your_text): | |
# split the text | |
nb_splits = 3 | |
splits = split_text(your_text, nb_splits) | |
y_logits_0, y_logits_1 = get_probs(splits) | |
print('y_logits_0:', y_logits_0) | |
print('y_logits_1:', y_logits_1) | |
y_probs_1 = softmax([y_logits_0, y_logits_1])[1] | |
print('y_probs_1:', y_probs_1) | |
threshold_value = 0.35 | |
y_pred_1 = (y_probs_1 >= threshold_value) | |
print('y_pred_1:', y_pred_1) | |
# inference | |
if not y_pred_1 : | |
return 'This content is not of high standard. It needs editing. ' | |
else: | |
return 'Promising content! Our algorithm predicts it will be very popular.' | |
iface = gr.Interface(fn=predict, inputs="text", outputs="text") | |
iface.launch() | |