BecomeAllan commited on
Commit
58dedb9
1 Parent(s): 5e00e8e
Files changed (4) hide show
  1. .vscode/settings.json +7 -0
  2. app.py +8 -4
  3. requirements.txt +1 -0
  4. utils.py +54 -0
.vscode/settings.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "workbench.colorCustomizations": {
3
+ "activityBar.background": "#590F35",
4
+ "titleBar.activeBackground": "#7C154B",
5
+ "titleBar.activeForeground": "#FEFCFD"
6
+ }
7
+ }
app.py CHANGED
@@ -5,6 +5,10 @@ import torch.nn as nn
5
  from torch.utils.data import Dataset, DataLoader
6
  import unicodedata
7
  import re
 
 
 
 
8
 
9
  # Undesirable patterns within texts
10
  patterns = {
@@ -169,7 +173,7 @@ def treat_data_input(data, etailment_txt):
169
  batch_size=200,drop_last=False,
170
  num_workers=num_workers)
171
 
172
- return dataload_train, dataload_remain
173
 
174
 
175
  import gc
@@ -191,7 +195,7 @@ def treat_train_evaluate(dataload_train, dataload_remain):
191
  weight_decay = config['weight_decay'])
192
 
193
 
194
- model_few.to('cuda')
195
  model_few.train()
196
 
197
 
@@ -214,7 +218,7 @@ def treat_sort(dataload_all,logits):
214
  def pipeline(data):
215
  # data = pd.read_csv(fil.name)
216
  data = pd.read_excel(data)
217
- dataload_train, dataload_remain = treat_data_input(data,"its a great text")
218
  logits = treat_train_evaluate(dataload_train, dataload_remain)
219
  treat_sort(dataload_all,logits)
220
  return "output.xlsx"
@@ -226,7 +230,7 @@ import gradio as gr
226
  with gr.Blocks() as demo:
227
  fil = gr.File(label="input data")
228
  output = gr.File(label="output data")
229
- greet_btn = gr.Button("Greet")
230
  greet_btn.click(fn=pipeline, inputs=fil, outputs=output)
231
 
232
  demo.launch()
 
5
  from torch.utils.data import Dataset, DataLoader
6
  import unicodedata
7
  import re
8
+ import gradio
9
+ import json
10
+ import numpy as np
11
+ import pandas as pd
12
 
13
  # Undesirable patterns within texts
14
  patterns = {
 
173
  batch_size=200,drop_last=False,
174
  num_workers=num_workers)
175
 
176
+ return dataload_train, dataload_remain, dataload_all
177
 
178
 
179
  import gc
 
195
  weight_decay = config['weight_decay'])
196
 
197
 
198
+ model_few.to(device)
199
  model_few.train()
200
 
201
 
 
218
  def pipeline(data):
219
  # data = pd.read_csv(fil.name)
220
  data = pd.read_excel(data)
221
+ dataload_train, dataload_remain, dataload_all = treat_data_input(data,"its a great text")
222
  logits = treat_train_evaluate(dataload_train, dataload_remain)
223
  treat_sort(dataload_all,logits)
224
  return "output.xlsx"
 
230
  with gr.Blocks() as demo:
231
  fil = gr.File(label="input data")
232
  output = gr.File(label="output data")
233
+ greet_btn = gr.Button("Rank")
234
  greet_btn.click(fn=pipeline, inputs=fil, outputs=output)
235
 
236
  demo.launch()
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  transformers==4.16.2
2
  torchmetrics==0.8.0
3
  matplotlib==3.5.1
 
4
  torch
 
1
  transformers==4.16.2
2
  torchmetrics==0.8.0
3
  matplotlib==3.5.1
4
+ gradio
5
  torch
utils.py CHANGED
@@ -1,7 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  import torch.nn as nn
3
  from torch.utils.data import Dataset, DataLoader
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  LABEL_MAP = {'negative': 0,
6
  'not included':0,
7
  '0':0,
 
1
+ import torch.nn.functional as F
2
+ import torch.nn as nn
3
+ from torch.utils.data import Dataset, DataLoader
4
+
5
+ import math
6
+ import torch
7
+ import numpy as np
8
+ import pandas as pd
9
+ import time
10
+ import transformers
11
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
12
+ from sklearn.manifold import TSNE
13
+ from copy import deepcopy, copy
14
+ import seaborn as sns
15
+ import matplotlib.pylab as plt
16
+ from pprint import pprint
17
+ import shutil
18
+ import datetime
19
+ import re
20
+ import json
21
+ from pathlib import Path
22
+
23
+
24
+ from itertools import chain
25
+ import numpy as np
26
+ import pandas as pd
27
+
28
+
29
+
30
  import torch
31
  import torch.nn as nn
32
  from torch.utils.data import Dataset, DataLoader
33
 
34
+ # Fetching pre-trained model and tokenizer
35
+ class initializer:
36
+ def __init__(self, MODEL_NAME, **config):
37
+ self.MODEL_NAME = MODEL_NAME
38
+
39
+ model = config.get("model")
40
+ tokenizer = config.get("tokenizer")
41
+
42
+ # Model
43
+ self.model = model.from_pretrained(MODEL_NAME,
44
+ return_dict=True,
45
+ output_attentions = False)
46
+ # Tokenizer
47
+ self.tokenizer = tokenizer.from_pretrained(MODEL_NAME,
48
+ do_lower_case = True)
49
+
50
+ config = {
51
+ "model": AutoModelForSequenceClassification,
52
+ "tokenizer": AutoTokenizer
53
+ }
54
+
55
+ # Pre-trained model initializer (uncased sciBERT)
56
+ initializer_model_scibert = initializer('allenai/scibert_scivocab_uncased', **config)
57
+ # initializer_model = initializer('bert-base-uncased', **config)
58
+
59
  LABEL_MAP = {'negative': 0,
60
  'not included':0,
61
  '0':0,