Berbex commited on
Commit
9429d1c
β€’
1 Parent(s): db6e115

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -2
app.py CHANGED
@@ -6,13 +6,41 @@ console = Console()
6
 
7
  dataset = load_dataset("zeroshot/twitter-financial-news-sentiment", )
8
 
9
- console.log( dataset['train'][:10] )
10
-
11
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
12
 
13
  model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
14
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def sentiment_score(review):
17
  tokens = tokenizer.encode(review, return_tensors='pt')
18
  result = model(tokens)
 
6
 
7
  dataset = load_dataset("zeroshot/twitter-financial-news-sentiment", )
8
 
 
 
9
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
10
 
11
  model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
12
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
13
 
14
+ labels = [label for label in dataset['train'].features.keys() if label not in ['text']]
15
+
16
+ console.log( labels )
17
+ def preprocess_data(examples):
18
+ # take a batch of texts
19
+ text = examples["text"]
20
+ # encode them
21
+ encoding = tokenizer(text, padding="max_length", truncation=True, max_length=128)
22
+ # add labels
23
+ labels_batch = {k: examples[k] for k in examples.keys() if k in labels}
24
+ # create numpy array of shape (batch_size, num_labels)
25
+ labels_matrix = np.zeros((len(text), len(labels)))
26
+ # fill numpy array
27
+ for idx, label in enumerate(labels):
28
+ labels_matrix[:, idx] = labels_batch[label]
29
+
30
+ encoding["labels"] = labels_matrix.tolist()
31
+
32
+ return encoding
33
+
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+
42
+
43
+
44
  def sentiment_score(review):
45
  tokens = tokenizer.encode(review, return_tensors='pt')
46
  result = model(tokens)