Spaces:
Sleeping
Sleeping
| import tensorflow as tf | |
| from transformers import AutoTokenizer, TFAutoModelForSequenceClassification | |
| import gradio as gr | |
| import numpy as np | |
| from scipy.special import softmax | |
| tokenizer = AutoTokenizer.from_pretrained('roberta-base') | |
| from transformers import TFAutoModelForSequenceClassification | |
| model = TFAutoModelForSequenceClassification.from_pretrained("ilan541/OncUponTim") | |
| def split_text(text, nb_splits): | |
| cols = ['split_'+ str(i) for i in range(1, nb_splits+1)] | |
| l = len(text) | |
| chars = int(l/nb_splits) | |
| out = [] | |
| for i in range(0, l, chars): | |
| out.append(text[ i : i+chars]) | |
| return out | |
| def get_probs(list_of_portions): | |
| y_pred_logits_0 = [] | |
| y_pred_logits_1 = [] | |
| for text in list_of_portions: | |
| inp = tokenizer(text, | |
| truncation=True, | |
| padding='max_length', | |
| max_length=512, | |
| return_tensors='tf') | |
| y_pred = model(inp) | |
| y_pred_logits_0.append(y_pred.logits[:,0]) | |
| y_pred_logits_1.append(y_pred.logits[:,1]) | |
| return np.mean(y_pred_logits_0), np.mean(y_pred_logits_1) | |
| def predict(your_text): | |
| # split the text | |
| nb_splits = 3 | |
| splits = split_text(your_text, nb_splits) | |
| y_logits_0, y_logits_1 = get_probs(splits) | |
| print('y_logits_0:', y_logits_0) | |
| print('y_logits_1:', y_logits_1) | |
| y_probs_1 = softmax([y_logits_0, y_logits_1])[1] | |
| print('y_probs_1:', y_probs_1) | |
| threshold_value = 0.35 | |
| y_pred_1 = (y_probs_1 >= threshold_value) | |
| print('y_pred_1:', y_pred_1) | |
| # inference | |
| if not y_pred_1 : | |
| return 'This content is not of high standard. It needs editing. ' | |
| else: | |
| return 'Promising content! Our algorithm predicts it will be very popular.' | |
| iface = gr.Interface(fn=predict, inputs="text", outputs="text") | |
| iface.launch() | |