File size: 1,767 Bytes
12d4f28
 
 
 
 
 
 
 
888db49
 
12d4f28
65367f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abf5a81
 
 
 
 
65367f6
 
 
 
 
 
 
12d4f28
9b4bd92
65367f6
3f2d91b
65367f6
 
49b0227
 
 
 
9b4bd92
e1e4e7f
49b0227
 
9b4bd92
49b0227
12d4f28
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import gradio as gr
import numpy as np
from scipy.special import softmax

tokenizer = AutoTokenizer.from_pretrained('roberta-base')

from transformers import TFAutoModelForSequenceClassification
model = TFAutoModelForSequenceClassification.from_pretrained("ilan541/OncUponTim")

def split_text(text, nb_splits):
  cols = ['split_'+ str(i) for i in range(1, nb_splits+1)]

  l = len(text)
  chars = int(l/nb_splits)

  out = []

  for i in range(0, l, chars):  
    out.append(text[ i : i+chars])
  return out

def get_probs(list_of_portions):
  y_pred_logits_0 = []
  y_pred_logits_1 = []
  for text in list_of_portions:
    inp = tokenizer(text,
                    truncation=True,
                    padding='max_length',
                    max_length=512,
                    return_tensors='tf')
    y_pred = model(inp)
    y_pred_logits_0.append(y_pred.logits[:,0])
    y_pred_logits_1.append(y_pred.logits[:,1])

  return np.mean(y_pred_logits_0), np.mean(y_pred_logits_1)


def predict(your_text):
  # split the text
  nb_splits = 3
  splits = split_text(your_text, nb_splits)
  
  y_logits_0, y_logits_1 = get_probs(splits)
  print('y_logits_0:', y_logits_0)
  print('y_logits_1:', y_logits_1)
  y_probs_1 = softmax([y_logits_0, y_logits_1])[1]
  print('y_probs_1:', y_probs_1)
  
  threshold_value = 0.35
  y_pred_1 = (y_probs_1 >= threshold_value)
  print('y_pred_1:', y_pred_1)
  # inference
  if not y_pred_1 :
    return 'This content is not of high standard. It needs editing. '
  else:
    return 'Promising content! Our algorithm predicts it will be very popular.'



iface = gr.Interface(fn=predict, inputs="text", outputs="text")
iface.launch()