saattrupdan commited on
Commit
8be4fd9
1 Parent(s): 80b5399

feat: Update app

Browse files
Files changed (1) hide show
  1. app.py +216 -161
app.py CHANGED
@@ -1,190 +1,245 @@
1
  """Gradio app that showcases Scandinavian zero-shot text classification models."""
2
 
 
3
  import gradio as gr
4
  from transformers import pipeline
5
  from luga import language as detect_language
6
-
7
-
8
- # Load the zero-shot classification pipeline
9
- classifier = pipeline(
10
- "zero-shot-classification", model="alexandrainst/scandi-nli-large"
11
- )
12
-
13
-
14
- # Set the description for the interface
15
- DESCRIPTION = """Classify text in Danish, Swedish or Norwegian into categories, without
16
- finetuning on any training data!
17
-
18
- Note that the models will most likely *not* work as well as a finetuned model on your
19
- specific data, but they can be used as a starting point for your own classification
20
- task ✨
21
-
22
-
23
- Also, be patient, as this demo is running on a CPU!"""
24
-
25
-
26
- def classification(task: str, doc: str) -> str:
27
  """Classify text into categories.
28
 
29
  Args:
30
- task (str):
31
- Task to perform.
32
  doc (str):
33
  Text to classify.
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  Returns:
36
- str:
37
- The predicted label.
38
  """
39
  # Detect the language of the text
40
  language = detect_language(doc.replace('\n', ' ')).name
41
 
42
- # Define the confidence string based on the language
43
- if language == "sv" or language == "no":
44
- confidence_str = "konfidensnivå"
 
 
 
 
45
  else:
46
- confidence_str = "konfidensniveau"
47
-
48
- # If the task is sentiment, classify the text into positive, negative or neutral
49
- if task == "Sentiment classification":
50
- if language == "sv":
51
- hypothesis_template = "Detta exempel är {}."
52
- candidate_labels = ["positivt", "negativt", "neutralt"]
53
- elif language == "no":
54
- hypothesis_template = "Dette eksemplet er {}."
55
- candidate_labels = ["positivt", "negativt", "nøytralt"]
56
- else:
57
- hypothesis_template = "Dette eksempel er {}."
58
- candidate_labels = ["positivt", "negativt", "neutralt"]
59
-
60
- # Else if the task is topic, classify the text into a topic
61
- elif task == "News topic classification":
62
- if language == "sv":
63
- hypothesis_template = "Detta exempel handlar om {}."
64
- candidate_labels = [
65
- "krig",
66
- "politik",
67
- "utbildning",
68
- "hälsa",
69
- "ekonomi",
70
- "mode",
71
- "sport",
72
- ]
73
- elif language == "no":
74
- hypothesis_template = "Dette eksemplet handler om {}."
75
- candidate_labels = [
76
- "krig",
77
- "politikk",
78
- "utdanning",
79
- "helse",
80
- "økonomi",
81
- "mote",
82
- "sport",
83
- ]
84
- else:
85
- hypothesis_template = "Denne nyhedsartikel handler primært om {}."
86
- candidate_labels = [
87
- "krig",
88
- "politik",
89
- "uddannelse",
90
- "sundhed",
91
- "økonomi",
92
- "mode",
93
- "sport",
94
- ]
95
-
96
- # Else if the task is spam detection, classify the text into spam or not spam
97
- elif task == "Spam detection":
98
- if language == "sv":
99
- hypothesis_template = "Det här e-postmeddelandet ser {}."
100
- candidate_labels = {
101
- "ut som ett skräppostmeddelande": "Spam",
102
- "inte ut som ett skräppostmeddelande": "Inte spam",
103
- }
104
- elif language == "no":
105
- hypothesis_template = "Denne e-posten ser {}."
106
- candidate_labels = {
107
- "ut som en spam-e-post": "Spam",
108
- "ikke ut som en spam-e-post": "Ikke spam",
109
- }
110
- else:
111
- hypothesis_template = "Denne e-mail ligner {}."
112
- candidate_labels = {
113
- "en spam e-mail": "Spam",
114
- "ikke en spam e-mail": "Ikke spam",
115
- }
116
-
117
- # Else if the task is product feedback detection, classify the text into product
118
- # feedback or not product feedback
119
- elif task == "Product feedback detection":
120
- if language == "sv":
121
- hypothesis_template = "Den här kommentaren är {}."
122
- candidate_labels = {
123
- "en recension av en produkt": "Produktfeedback",
124
- "inte en recension av en produkt": "Inte produktfeedback",
125
- }
126
- elif language == "no":
127
- hypothesis_template = "Denne kommentaren er {}."
128
- candidate_labels = {
129
- "en anmeldelse av et produkt": "Produkttilbakemelding",
130
- "ikke en anmeldelse av et produkt": "Ikke produkttilbakemelding",
131
- }
132
- else:
133
- hypothesis_template = "Denne kommentar er {}."
134
- candidate_labels = {
135
- "en anmeldelse af et produkt": "Produktfeedback",
136
- "ikke en anmeldelse af et produkt": "Ikke produktfeedback",
137
- }
138
-
139
- # Else the task is not supported, so raise an error
140
- else:
141
- raise ValueError(f"Task {task} not supported.")
142
-
143
- # If `candidate_labels` is a list then convert it to a dictionary, where the keys
144
- # are the entries in the list and the values are the keys capitalized
145
- if isinstance(candidate_labels, list):
146
- candidate_labels = {label: label.capitalize() for label in candidate_labels}
147
 
148
  # Run the classifier on the text
149
  result = classifier(
150
  doc,
151
- candidate_labels=list(candidate_labels.keys()),
152
  hypothesis_template=hypothesis_template,
153
  )
154
 
155
  print(result)
156
 
157
  # Return the predicted label
158
- return (
159
- f"{candidate_labels[result['labels'][0]]}\n"
160
- f"({confidence_str}: {result['scores'][0]:.0%})"
 
 
 
 
 
 
161
  )
162
 
163
- # Create a dropdown menu for the task
164
- dropdown = gr.inputs.Dropdown(
165
- label="Task",
166
- choices=[
167
- "Sentiment classification",
168
- "News topic classification",
169
- "Spam detection",
170
- "Product feedback detection",
171
- ],
172
- default="Sentiment classification",
173
- )
174
-
175
- # Create a text box for the input text
176
- input_textbox = gr.inputs.Textbox(
177
- label="Text", default="Jeg er helt vild med fodbolden 😊"
178
- )
179
-
180
- # Create the interface, where the function depends on the task chosen
181
- interface = gr.Interface(
182
- fn=classification,
183
- inputs=[dropdown, input_textbox],
184
- outputs=gr.outputs.Label(type="text"),
185
- title="Scandinavian zero-shot text classification",
186
- description=DESCRIPTION,
187
- )
188
-
189
- # Run the app
190
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Gradio app that showcases Scandinavian zero-shot text classification models."""
2
 
3
+ from typing import Dict, Tuple
4
  import gradio as gr
5
  from transformers import pipeline
6
  from luga import language as detect_language
7
+ import re
8
+
9
+
10
+ def classification(
11
+ doc: str,
12
+ da_hypothesis_template: str,
13
+ da_candidate_labels: str,
14
+ sv_hypothesis_template: str,
15
+ sv_candidate_labels: str,
16
+ no_hypothesis_template: str,
17
+ no_candidate_labels: str,
18
+ ) -> Dict[str, float]:
 
 
 
 
 
 
 
 
 
19
  """Classify text into categories.
20
 
21
  Args:
 
 
22
  doc (str):
23
  Text to classify.
24
+ da_hypothesis_template (str):
25
+ Template for the hypothesis to be used for Danish classification.
26
+ da_candidate_labels (str):
27
+ Comma-separated list of candidate labels for Danish classification.
28
+ sv_hypothesis_template (str):
29
+ Template for the hypothesis to be used for Swedish classification.
30
+ sv_candidate_labels (str):
31
+ Comma-separated list of candidate labels for Swedish classification.
32
+ no_hypothesis_template (str):
33
+ Template for the hypothesis to be used for Norwegian classification.
34
+ no_candidate_labels (str):
35
+ Comma-separated list of candidate labels for Norwegian classification.
36
 
37
  Returns:
38
+ dict of str to float:
39
+ The predicted label and the confidence score.
40
  """
41
  # Detect the language of the text
42
  language = detect_language(doc.replace('\n', ' ')).name
43
 
44
+ # Set the hypothesis template and candidate labels based on the detected language
45
+ if language == "sv":
46
+ hypothesis_template = sv_hypothesis_template
47
+ candidate_labels = re.split(r', *', sv_candidate_labels)
48
+ elif language == "no":
49
+ hypothesis_template = no_hypothesis_template
50
+ candidate_labels = re.split(r', *', no_candidate_labels)
51
  else:
52
+ hypothesis_template = da_hypothesis_template
53
+ candidate_labels = re.split(r', *', da_candidate_labels)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  # Run the classifier on the text
56
  result = classifier(
57
  doc,
58
+ candidate_labels=candidate_labels,
59
  hypothesis_template=hypothesis_template,
60
  )
61
 
62
  print(result)
63
 
64
  # Return the predicted label
65
+ return {lbl: score for lbl, score in zip(result["labels"], result["scores"])}
66
+
67
+
68
+ def main():
69
+
70
+ # Load the zero-shot classification pipeline
71
+ global classifier
72
+ classifier = pipeline(
73
+ "zero-shot-classification", model="alexandrainst/scandi-nli-large"
74
  )
75
 
76
+ # Create dictionary of descriptions for each task, containing the hypothesis template
77
+ # and candidate labels
78
+ task_configs: Dict[str, Tuple[str, str, str, str, str, str]] = {
79
+ "Sentiment classification": (
80
+ "Dette eksempel er {}.",
81
+ "positivt, negativt, neutralt",
82
+ "Detta exempel är {}.",
83
+ "positivt, negativt, neutralt",
84
+ "Dette eksemplet er {}.",
85
+ "positivt, negativt, nøytralt",
86
+ ),
87
+ "News topic classification": (
88
+ "Denne nyhedsartikel handler primært om {}.",
89
+ "krig, politik, uddannelse, sundhed, økonomi, mode, sport",
90
+ "Den här nyhetsartikeln handlar främst om {}.",
91
+ "krig, politik, utbildning, hälsa, ekonomi, mode, sport",
92
+ "Denne nyhetsartikkelen handler først og fremst om {}.",
93
+ "krig, politikk, utdanning, helse, økonomi, mote, sport",
94
+ ),
95
+ "Spam detection": (
96
+ "Denne e-mail ligner {}.",
97
+ "en spam e-mail, ikke en spam e-mail",
98
+ "Det här e-postmeddelandet ser {}.",
99
+ "ut som ett skräppostmeddelande, inte ut som ett skräppostmeddelande",
100
+ "Denne e-posten ser {}.",
101
+ "ut som en spam-e-post, ikke ut som en spam-e-post",
102
+ ),
103
+ "Product feedback detection": (
104
+ "Denne kommentar er {}.",
105
+ "en anmeldelse af et produkt, ikke en anmeldelse af et produkt",
106
+ "Den här kommentaren är {}.",
107
+ "en recension av en produkt, inte en recension av en produkt",
108
+ "Denne kommentaren er {}.",
109
+ "en anmeldelse av et produkt, ikke en anmeldelse av et produkt",
110
+ ),
111
+ "Define your own task!": (
112
+ "Dette eksempel er {}.",
113
+ "",
114
+ "Detta exempel är {}.",
115
+ "",
116
+ "Dette eksemplet er {}.",
117
+ "",
118
+ ),
119
+ }
120
+
121
+ def set_task_setup(task: str) -> Tuple[str, str, str, str, str, str]:
122
+ return task_configs[task]
123
+
124
+ with gr.Blocks() as demo:
125
+
126
+ # Create title and description
127
+ gr.Markdown("# Scandinavian Zero-shot Text Classification")
128
+ gr.Markdown("""
129
+ Classify text in Danish, Swedish or Norwegian into categories, without
130
+ finetuning on any training data!
131
+
132
+ Note that the models will most likely not work as well as a finetuned model
133
+ on your specific data, but they can be used as a starting point for your
134
+ own classification task ✨
135
+
136
+ Also, be patient, as this demo is running on a CPU!
137
+ """)
138
+
139
+ with gr.Row():
140
+
141
+ # Input column
142
+ with gr.Column():
143
+
144
+ # Create a dropdown menu for the task
145
+ dropdown = gr.inputs.Dropdown(
146
+ label="Task",
147
+ choices=[
148
+ "Sentiment classification",
149
+ "News topic classification",
150
+ "Spam detection",
151
+ "Product feedback detection",
152
+ "Define your own task!",
153
+ ],
154
+ default="Sentiment classification",
155
+ )
156
+
157
+ with gr.Row(variant="compact"):
158
+ da_hypothesis_template = gr.inputs.Textbox(
159
+ label="Danish hypothesis template",
160
+ default="Dette eksempel er {}.",
161
+ )
162
+ da_candidate_labels = gr.inputs.Textbox(
163
+ label="Danish candidate labels (comma separated)",
164
+ default="positivt, negativt, neutralt",
165
+ )
166
+
167
+ with gr.Row(variant="compact"):
168
+ sv_hypothesis_template = gr.inputs.Textbox(
169
+ label="Swedish hypothesis template",
170
+ default="Detta exempel är {}.",
171
+ )
172
+ sv_candidate_labels = gr.inputs.Textbox(
173
+ label="Swedish candidate labels (comma separated)",
174
+ default="positivt, negativt, neutralt",
175
+ )
176
+
177
+ with gr.Row(variant="compact"):
178
+ no_hypothesis_template = gr.inputs.Textbox(
179
+ label="Norwegian hypothesis template",
180
+ default="Dette eksemplet er {}.",
181
+ )
182
+ no_candidate_labels = gr.inputs.Textbox(
183
+ label="Norwegian candidate labels (comma separated)",
184
+ default="positivt, negativt, nøytralt",
185
+ )
186
+
187
+ # When a new task is chosen, update the description
188
+ dropdown.change(
189
+ fn=set_task_setup,
190
+ inputs=dropdown,
191
+ outputs=[
192
+ da_hypothesis_template,
193
+ da_candidate_labels,
194
+ sv_hypothesis_template,
195
+ sv_candidate_labels,
196
+ no_hypothesis_template,
197
+ no_candidate_labels,
198
+ ],
199
+ )
200
+
201
+ # Output column
202
+ with gr.Column():
203
+
204
+ # Create a text box for the input text
205
+ input_textbox = gr.inputs.Textbox(
206
+ label="Input text", default="Jeg er helt vild med fodbolden 😊"
207
+ )
208
+
209
+ with gr.Row():
210
+ clear_btn = gr.Button(value="Clear", width=0.5)
211
+ submit_btn = gr.Button(value="Submit", width=0.5, variant="primary")
212
+
213
+ # When the clear button is clicked, clear the input text box
214
+ clear_btn.click(
215
+ fn=lambda _: "", inputs=input_textbox, outputs=input_textbox
216
+ )
217
+
218
+
219
+ with gr.Column():
220
+
221
+ # Create output text box
222
+ output_textbox = gr.Label(label="Result")
223
+
224
+ # When the submit button is clicked, run the classifier on the input text
225
+ # and display the result in the output text box
226
+ submit_btn.click(
227
+ fn=classification,
228
+ inputs=[
229
+ input_textbox,
230
+ da_hypothesis_template,
231
+ da_candidate_labels,
232
+ sv_hypothesis_template,
233
+ sv_candidate_labels,
234
+ no_hypothesis_template,
235
+ no_candidate_labels,
236
+ ],
237
+ outputs=output_textbox,
238
+ )
239
+
240
+ # Run the app
241
+ demo.launch()
242
+
243
+
244
+ if __name__ == "__main__":
245
+ main()