root commited on
Commit
c2cbf3f
0 Parent(s):

text-classification -v1

Browse files
Files changed (3) hide show
  1. Makefile +27 -0
  2. app.py +47 -0
  3. requirements.txt +3 -0
Makefile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ install:
2
+ pip install --upgrade pip &&\
3
+ pip install -r requirements.txt
4
+
5
+ test:
6
+ python -m pytest -vvv --cov=hello --cov=greeting \
7
+ --cov=smath --cov=web tests
8
+ python -m pytest --nbval notebook.ipynb #tests our jupyter notebook
9
+ #python -m pytest -v tests/test_web.py #if you just want to test web
10
+
11
+ debug:
12
+ python -m pytest -vv --pdb #Debugger is invoked
13
+
14
+ one-test:
15
+ python -m pytest -vv tests/test_greeting.py::test_my_name4
16
+
17
+ debugthree:
18
+ #not working the way I expect
19
+ python -m pytest -vv --pdb --maxfail=4 # drop to PDB for first three failures
20
+
21
+ format:
22
+ black *.py
23
+
24
+ lint:
25
+ pylint --disable=R,C *.py
26
+
27
+ all: install lint test format
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSequenceClassification
2
+ from transformers import AutoTokenizer, AutoConfig
3
+ import numpy as np
4
+ from scipy.special import softmax
5
+ import gradio as gr
6
+
7
+ # Preprocess text (username and link placeholders)
8
+ def preprocess(text):
9
+ new_text = []
10
+ for t in text.split(" "):
11
+ t = '@user' if t.startswith('@') and len(t) > 1 else t
12
+ t = 'http' if t.startswith('http') else t
13
+ new_text.append(t)
14
+ return " ".join(new_text)
15
+
16
+ # load model
17
+ MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
18
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL)
19
+ #model.save_pretrained(MODEL)
20
+
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
23
+ config = AutoConfig.from_pretrained(MODEL)
24
+
25
+ # create classifier function
26
+ def classify_sentiments(text):
27
+ text = preprocess(text)
28
+ encoded_input = tokenizer(text, return_tensors='pt')
29
+ output = model(**encoded_input)
30
+ scores = output[0][0].detach().numpy()
31
+ scores = softmax(scores)
32
+
33
+ # Print labels and scores
34
+ probs = {}
35
+ ranking = np.argsort(scores)
36
+ ranking = ranking[::-1]
37
+
38
+ for i in range(len(scores)):
39
+ l = config.id2label[ranking[i]]
40
+ s = scores[ranking[i]]
41
+ probs[l] = np.round(float(s), 4)
42
+ return probs
43
+
44
+
45
+ #build the Gradio app
46
+ gr.Interface(classify_sentiments, 'text', 'label').launch()
47
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ scipy
2
+ gradio
3
+ numpy