File size: 2,371 Bytes
ab4f033
 
 
 
 
 
 
72b3984
ab4f033
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# imports
from transformers import pipeline
import gradio as gr
import pandas as pd

# define nlp mask
model = "siebert/sentiment-roberta-large-english"
nlp = pipeline(model=model)  # set device=0 to use GPU (CPU default, -1)

# perform inference on given file
def inference(df, filename):
  # texts & ids
  texts = df[df.columns[1]].to_list()
  ids = df[df.columns[0]].to_list()

  # create new df based on csv inputs
  new_df = pd.DataFrame(columns=[df.columns[0], df.columns[1], "Label", "Score"])

  # iterate over texts, perform inference
  for index in range(len(texts)):
    preds = nlp(texts[index])
    pred_sentiment = preds[0]["label"]
    pred_score = preds[0]["score"]
    print(texts[index])
    print(preds)

    # write data into df
    # predicted sentiment
    new_df.at[index, "Label"] = pred_sentiment
    # predicted score
    new_df.at[index, "Score"] = pred_score
    # write text
    new_df.at[index, df.columns[1]] = texts[index]
    # write ID
    new_df.at[index, df.columns[0]] = ids[index]

  # export new file
  n_filename = filename.name.split(".")[0] + "_csiebert_sentiment.csv"
  new_df.to_csv(n_filename, index=False)
  
  # return new file
  return n_filename
  
#  handle file reading for both csv and excel files
def read_file(filename):
  # check type of input file
  if filename.name.split(".")[1] == "csv":
    print("entered")
    # read file, drop index if exists
    df = pd.read_csv(filename.name, index_col=False)
    # perform inference on given .csv file
    result = inference(df=df, filename=filename)
    print("computed")
    return result
  elif filename.name.split(".")[1] == "xlsx":
    df = pd.read_excel(filename.name, index_col=False)
    # handle Unnamed
    if df.columns[0] == "Unnamed: 0":
      df = df.drop("Unnamed: 0", axis=1)
    # perform inference on given .xlsx file
    result = inference(df=df, filename=filename)
    return result
  # if neither csv nor xlsx provided -> exit
  else:
    return

gr.Interface(read_file,
             inputs=[gr.inputs.File(label="Input file")],
             outputs=[gr.outputs.File(label="Output file")],
             description="Sentiment analysis: Input a csv/xlsx of form ID, Text. App performs sentiment analysis on Texts and exports results as new csv to download.",
             allow_flagging=False,
             layout="horizontal",
             ).launch()