kaushikbar commited on
Commit
83f2778
1 Parent(s): 034a568

Loaded classifiers apriori

Browse files
Files changed (1) hide show
  1. app.py +82 -66
app.py CHANGED
@@ -19,6 +19,82 @@ hypothesis_templates = {'en': 'This example is {}.', # English
19
  'tr': 'Bu örnek {}.', # Turkish
20
  'no': 'Dette eksempelet er {}.'} # Norsk
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def detect_lang(sequence, labels):
23
  DetectorFactory.seed = 0
24
  seq_lang = 'en'
@@ -57,14 +133,10 @@ def detect_lang(sequence, labels):
57
 
58
  return seq_lang
59
 
60
-
61
  def sequence_to_classify(sequence, labels):
62
- label_clean = str(labels).split(",")
63
 
64
- lang = detect_lang(sequence, labels)
65
- classifier = pipeline("zero-shot-classification",
66
- hypothesis_template=hypothesis_templates[lang],
67
- model=models[lang])
68
  response = classifier(sequence, label_clean, multi_label=True)
69
 
70
  predicted_labels = response['labels']
@@ -77,75 +149,19 @@ def sequence_to_classify(sequence, labels):
77
 
78
  return clean_output
79
 
80
- example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
81
- people who fall sick with COVID-19 will experience mild to moderate symptoms and recover without special treatment. \
82
- However, some will become seriously ill and require medical attention."
83
- example_labels1 = "business,health related,politics,climate change"
84
-
85
- example_text2 = "Elephants are"
86
- example_labels2 = "big,small,strong,fast,carnivorous"
87
-
88
- example_text3 = "Elephants"
89
- example_labels3 = "are big,can be very small,generally not strong enough,are faster than you think"
90
-
91
- example_text4 = "Dogs are man's best friend"
92
- example_labels4 = "positive,negative,neutral"
93
-
94
- example_text5 = "Amar sonar bangla ami tomay bhalobasi"
95
- example_labels5 = "bhalo,kharap"
96
-
97
- example_text6 = "Letzte Woche gab es einen Selbstmord in einer nahe gelegenen kolonie"
98
- example_labels6 = "verbrechen,tragödie,stehlen"
99
-
100
- example_text7 = "El autor se perfila, a los 50 años de su muerte, como uno de los grandes de su siglo"
101
- example_labels7 = "cultura,sociedad,economia,salud,deportes"
102
-
103
- example_text8 = "Россия в среду заявила, что военные учения в аннексированном Москвой Крыму закончились \
104
- и что солдаты возвращаются в свои гарнизоны, на следующий день после того, как она объявила о первом выводе \
105
- войск от границ Украины."
106
- example_labels8 = "новости,комедия"
107
-
108
- example_text9 = "I quattro registi - Federico Fellini, Pier Paolo Pasolini, Bernardo Bertolucci e Vittorio De Sica - \
109
- hanno utilizzato stili di ripresa diversi, ma hanno fortemente influenzato le giovani generazioni di registi."
110
- example_labels9 = "cinema,politica,cibo"
111
-
112
- example_text10 = "Ja, vi elsker dette landet,\
113
- som det stiger frem,\
114
- furet, værbitt over vannet,\
115
- med de tusen hjem.\
116
- Og som fedres kamp har hevet\
117
- det av nød til seir"
118
- example_labels10 = "helse,sport,religion,mat,patriotisme og nasjonalisme"
119
-
120
- example_text11 = "Şampiyonlar Ligi’nde 5. hafta oynanan karşılaşmaların ardından sona erdi. Real Madrid, \
121
- Inter ve Sporting oynadıkları mücadeleler sonrasında Son 16 turuna yükselmeyi başardı. \
122
- Gecenin dev mücadelesinde ise Manchester City, PSG’yi yenerek liderliği garantiledi."
123
- example_labels11 = "dünya,ekonomi,kültür,siyaset,spor,teknoloji"
124
-
125
  iface = gr.Interface(
126
  title="Multilingual Multi-label Zero-shot Classification",
127
  description="Currently supported languages are English, German, Spanish, Italian, Russian, Turkish, Norsk.",
128
  fn=sequence_to_classify,
129
- inputs=[gr.inputs.Textbox(lines=20,
130
  label="Please enter the text you would like to classify...",
131
  placeholder="Text here..."),
132
- gr.inputs.Textbox(lines=5,
133
  label="Possible candidate labels (separated by comma)...",
134
  placeholder="Labels here separated by comma...")],
135
  outputs=gr.outputs.Label(num_top_classes=5),
136
  capture_session=True,
137
  #interpretation="default",
138
- examples=[
139
- [example_text1, example_labels1],
140
- [example_text2, example_labels2],
141
- [example_text3, example_labels3],
142
- [example_text4, example_labels4],
143
- [example_text5, example_labels5],
144
- [example_text6, example_labels6],
145
- [example_text7, example_labels7],
146
- [example_text8, example_labels8],
147
- [example_text9, example_labels9],
148
- [example_text10, example_labels10],
149
- [example_text11, example_labels11]]
150
- )
151
  iface.launch()
 
19
  'tr': 'Bu örnek {}.', # Turkish
20
  'no': 'Dette eksempelet er {}.'} # Norsk
21
 
22
+ classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['en'],
23
+ model=models['en']),
24
+ 'de': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['de'],
25
+ model=models['de']),
26
+ 'es': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['es'],
27
+ model=models['es']),
28
+ 'it': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['it'],
29
+ model=models['it']),
30
+ 'ru': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['ru'],
31
+ model=models['ru']),
32
+ 'tr': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['tr'],
33
+ model=models['tr']),
34
+ 'no': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['no'],
35
+ model=models['no'])}
36
+
37
+ def prep_examples():
38
+ example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
39
+ people who fall sick with COVID-19 will experience mild to moderate symptoms and recover without special treatment. \
40
+ However, some will become seriously ill and require medical attention."
41
+ example_labels1 = "business,health related,politics,climate change"
42
+
43
+ example_text2 = "Elephants are"
44
+ example_labels2 = "big,small,strong,fast,carnivorous"
45
+
46
+ example_text3 = "Elephants"
47
+ example_labels3 = "are big,can be very small,generally not strong enough,are faster than you think"
48
+
49
+ example_text4 = "Dogs are man's best friend"
50
+ example_labels4 = "positive,negative,neutral"
51
+
52
+ example_text5 = "Amar sonar bangla ami tomay bhalobasi"
53
+ example_labels5 = "bhalo,kharap"
54
+
55
+ example_text6 = "Letzte Woche gab es einen Selbstmord in einer nahe gelegenen kolonie"
56
+ example_labels6 = "verbrechen,tragödie,stehlen"
57
+
58
+ example_text7 = "El autor se perfila, a los 50 años de su muerte, como uno de los grandes de su siglo"
59
+ example_labels7 = "cultura,sociedad,economia,salud,deportes"
60
+
61
+ example_text8 = "Россия в среду заявила, что военные учения в аннексированном Москвой Крыму закончились \
62
+ и что солдаты возвращаются в свои гарнизоны, на следующий день после того, как она объявила о первом выводе \
63
+ войск от границ Украины."
64
+ example_labels8 = "новости,комедия"
65
+
66
+ example_text9 = "I quattro registi - Federico Fellini, Pier Paolo Pasolini, Bernardo Bertolucci e Vittorio De Sica - \
67
+ hanno utilizzato stili di ripresa diversi, ma hanno fortemente influenzato le giovani generazioni di registi."
68
+ example_labels9 = "cinema,politica,cibo"
69
+
70
+ example_text10 = "Ja, vi elsker dette landet,\
71
+ som det stiger frem,\
72
+ furet, værbitt over vannet,\
73
+ med de tusen hjem.\
74
+ Og som fedres kamp har hevet\
75
+ det av nød til seir"
76
+ example_labels10 = "helse,sport,religion,mat,patriotisme og nasjonalisme"
77
+
78
+ example_text11 = "Şampiyonlar Ligi’nde 5. hafta oynanan karşılaşmaların ardından sona erdi. Real Madrid, \
79
+ Inter ve Sporting oynadıkları mücadeleler sonrasında Son 16 turuna yükselmeyi başardı. \
80
+ Gecenin dev mücadelesinde ise Manchester City, PSG’yi yenerek liderliği garantiledi."
81
+ example_labels11 = "dünya,ekonomi,kültür,siyaset,spor,teknoloji"
82
+
83
+ examples = [
84
+ [example_text1, example_labels1],
85
+ [example_text2, example_labels2],
86
+ [example_text3, example_labels3],
87
+ [example_text4, example_labels4],
88
+ [example_text5, example_labels5],
89
+ [example_text6, example_labels6],
90
+ [example_text7, example_labels7],
91
+ [example_text8, example_labels8],
92
+ [example_text9, example_labels9],
93
+ [example_text10, example_labels10],
94
+ [example_text11, example_labels11]]
95
+
96
+ return examples
97
+
98
  def detect_lang(sequence, labels):
99
  DetectorFactory.seed = 0
100
  seq_lang = 'en'
 
133
 
134
  return seq_lang
135
 
 
136
  def sequence_to_classify(sequence, labels):
137
+ classifier = classifiers[detect_lang(sequence, labels)]
138
 
139
+ label_clean = str(labels).split(",")
 
 
 
140
  response = classifier(sequence, label_clean, multi_label=True)
141
 
142
  predicted_labels = response['labels']
 
149
 
150
  return clean_output
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  iface = gr.Interface(
153
  title="Multilingual Multi-label Zero-shot Classification",
154
  description="Currently supported languages are English, German, Spanish, Italian, Russian, Turkish, Norsk.",
155
  fn=sequence_to_classify,
156
+ inputs=[gr.inputs.Textbox(lines=10,
157
  label="Please enter the text you would like to classify...",
158
  placeholder="Text here..."),
159
+ gr.inputs.Textbox(lines=2,
160
  label="Possible candidate labels (separated by comma)...",
161
  placeholder="Labels here separated by comma...")],
162
  outputs=gr.outputs.Label(num_top_classes=5),
163
  capture_session=True,
164
  #interpretation="default",
165
+ examples=prep_examples())
166
+
 
 
 
 
 
 
 
 
 
 
 
167
  iface.launch()