samueldomdey commited on
Commit
cfa89f0
1 Parent(s): 294751b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import numpy as np
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
5
  # summary function - test for single gradio function interface
 
6
  def bulk_function(filename):
7
  # Create class for data preparation
8
  class SimpleDataset:
@@ -20,18 +21,25 @@ def bulk_function(filename):
20
  tokenizer = AutoTokenizer.from_pretrained(model_name)
21
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
22
  trainer = Trainer(model=model)
 
 
 
 
23
 
24
  # read file lines
25
  with open(filename.name, "r") as f:
26
  lines = f.readlines()
27
  # expects unnamed:0 or index, col name -> strip both
28
  lines_s = [item.split("\n")[0].split(",")[-1] for item in lines]
 
 
 
29
 
30
  # Tokenize texts and create prediction data set
31
  tokenized_texts = tokenizer(lines_s,truncation=True,padding=True)
32
  pred_dataset = SimpleDataset(tokenized_texts)
33
 
34
- # Run predictions
35
  predictions = trainer.predict(pred_dataset)
36
 
37
  # Transform predictions to labels
@@ -65,11 +73,8 @@ def bulk_function(filename):
65
  df = pd.DataFrame(list(zip(lines_s,preds,labels,scores, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=['text','pred','label','score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
66
 
67
  # save results to csv
68
- YOUR_FILENAME = "YOUR_FILENAME_EMOTIONS_gradio.csv" # name your output file
69
  df.to_csv(YOUR_FILENAME)
70
 
71
  # return dataframe for space output
72
- return df
73
- # launch space
74
- gr.Interface(bulk_function, [gr.inputs.File(file_count="single", type="file", label="str", optional=False),], "dataframe",
75
- ).launch()
 
3
  import numpy as np
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
5
  # summary function - test for single gradio function interface
6
+ # summary function - test for single gradio function interfrace
7
  def bulk_function(filename):
8
  # Create class for data preparation
9
  class SimpleDataset:
 
21
  tokenizer = AutoTokenizer.from_pretrained(model_name)
22
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
23
  trainer = Trainer(model=model)
24
+ print(filename, type(filename))
25
+ print(filename.name)
26
+
27
+
28
 
29
  # read file lines
30
  with open(filename.name, "r") as f:
31
  lines = f.readlines()
32
  # expects unnamed:0 or index, col name -> strip both
33
  lines_s = [item.split("\n")[0].split(",")[-1] for item in lines]
34
+ print(lines_s)
35
+ print(filename)
36
+
37
 
38
  # Tokenize texts and create prediction data set
39
  tokenized_texts = tokenizer(lines_s,truncation=True,padding=True)
40
  pred_dataset = SimpleDataset(tokenized_texts)
41
 
42
+ # Run predictions -> predict whole df
43
  predictions = trainer.predict(pred_dataset)
44
 
45
  # Transform predictions to labels
 
73
  df = pd.DataFrame(list(zip(lines_s,preds,labels,scores, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=['text','pred','label','score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
74
 
75
  # save results to csv
76
+ YOUR_FILENAME = filename.name.split(".")[0] + "_emotion_predictions" + ".csv" # name your output file
77
  df.to_csv(YOUR_FILENAME)
78
 
79
  # return dataframe for space output
80
+ return df