Pendrokar commited on
Commit
df3b007
1 Parent(s): b874891

no bert; score text

Browse files
Files changed (1) hide show
  1. app.py +59 -16
app.py CHANGED
@@ -1,32 +1,75 @@
 
 
1
  import gradio as gr
2
- import torch
3
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, DistilBertForSequenceClassification
4
 
5
- modelName = "Pendrokar/TorchMoji"
 
6
 
7
- distil_tokenizer = AutoTokenizer.from_pretrained(modelName)
8
- distil_model = AutoModelForSequenceClassification.from_pretrained(modelName, problem_type="multi_label_classification")
9
 
10
- pipeline = pipeline(task="text-classification", model=distil_model, tokenizer=distil_tokenizer)
 
11
 
12
- def predict(deepmoji_analysis):
13
- predictions = pipeline(deepmoji_analysis)
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
 
 
 
 
 
 
15
  output_text = "\n"
16
- for p in predictions:
17
- output_text += p['label'] + ' (' + str(p['score']) + ")\n"
18
- return str(distil_tokenizer(deepmoji_analysis)["input_ids"]) + output_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  gradio_app = gr.Interface(
21
  fn=predict,
22
  inputs="text",
23
  outputs="text",
24
  examples=[
25
- "This GOT show just remember LOTR times!",
26
- "Man, can't believe that my 30 days of training just got a NaN loss",
27
- "I couldn't see 3 Tom Hollands coming...",
28
- "There is nothing better than a soul-warming coffee in the morning",
29
- "I fear the vanishing gradient", "deberta"
 
 
30
  ]
31
  )
32
 
 
1
+ from __future__ import print_function, division, unicode_literals
2
+
3
  import gradio as gr
 
 
4
 
5
+ import sys
6
+ from os.path import abspath, dirname
7
 
8
+ import json
9
+ import numpy as np
10
 
11
+ from torchmoji.sentence_tokenizer import SentenceTokenizer
12
+ from torchmoji.model_def import torchmoji_emojis
13
 
14
+ model_name = "Uberduck/torchmoji"
15
+ model_path = model_name + "/pytorch_model.bin"
16
+ vocab_path = model_name + "/vocabulary.json"
17
+
18
+ def top_elements(array, k):
19
+ ind = np.argpartition(array, -k)[-k:]
20
+ return ind[np.argsort(array[ind])][::-1]
21
+
22
+ maxlen = 30
23
+
24
+ print('Tokenizing using dictionary from {}'.format(vocab_path))
25
+ with open(vocab_path, 'r') as f:
26
+ vocabulary = json.load(f)
27
+
28
+ st = SentenceTokenizer(vocabulary, maxlen)
29
 
30
+ print('Loading model from {}.'.format(model_path))
31
+ model = torchmoji_emojis(model_path)
32
+ print(model)
33
+
34
+ def doImportableFunction():
35
+ return
36
+
37
+ def predict(deepmoji_analysis):
38
  output_text = "\n"
39
+ print('Running predictions.')
40
+ tokenized, _, _ = st.tokenize_sentences(TEST_SENTENCES)
41
+ prob = model(tokenized)
42
+
43
+ for prob in [prob]:
44
+ # Find top emojis for each sentence. Emoji ids (0-63)
45
+ # correspond to the mapping in emoji_overview.png
46
+ # at the root of the torchMoji repo.
47
+ scores = []
48
+ for i, t in enumerate(TEST_SENTENCES):
49
+ t_tokens = tokenized[i]
50
+ t_score = [t]
51
+ t_prob = prob[i]
52
+ ind_top = top_elements(t_prob, 5)
53
+ t_score.append(sum(t_prob[ind_top]))
54
+ t_score.extend(ind_top)
55
+ t_score.extend([t_prob[ind] for ind in ind_top])
56
+ scores.append(t_score)
57
+ output_text += t_score
58
+
59
+ return str(tokenized) + output_text
60
 
61
  gradio_app = gr.Interface(
62
  fn=predict,
63
  inputs="text",
64
  outputs="text",
65
  examples=[
66
+ "You love hurting me, huh?",
67
+ "I know good movies, this ain't one",
68
+ "It was fun, but I'm not going to miss you",
69
+ "My flight is delayed.. amazing.",
70
+ "What is happening to me??",
71
+ "This is the shit!",
72
+ "This is shit!",
73
  ]
74
  )
75