benjaminBeuster commited on
Commit
5475ac5
·
verified ·
1 Parent(s): fc9c42a

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +152 -19
app.py CHANGED
@@ -1,6 +1,7 @@
1
  """
2
  HuggingFace Space - ESS Variable Classification Demo
3
  Interactive Gradio interface for the XLM-RoBERTa ESS classifier.
 
4
  """
5
  import gradio as gr
6
  from transformers import pipeline
@@ -9,16 +10,33 @@ from transformers import pipeline
9
  MODEL_NAME = "benjaminBeuster/xlm-roberta-base-ess-classification"
10
  classifier = pipeline("text-classification", model=MODEL_NAME)
11
 
 
 
 
 
 
 
 
12
  # Category descriptions
13
  CATEGORY_INFO = {
14
  "DEMOGRAPHY (POPULATION, VITAL STATISTICS, AND CENSUSES)": "Demographics, population statistics, age, gender",
15
- "ECONOMICS": "Economic issues, finance, income",
16
  "EDUCATION": "Education, schooling, qualifications",
17
  "HEALTH": "Healthcare, medical services, health satisfaction",
18
  "POLITICS": "Political systems, trust in government, parliament",
19
  "SOCIETY AND CULTURE": "Social issues, cultural topics, religion",
20
  "LABOUR AND EMPLOYMENT": "Work, occupation, employment status",
21
  "PSYCHOLOGY": "Mental health, psychological wellbeing",
 
 
 
 
 
 
 
 
 
 
22
  "OTHER": "General or uncategorized topics"
23
  }
24
 
@@ -40,63 +58,178 @@ def classify_text(text):
40
 
41
  return output
42
 
43
- # Example questions from actual ESS training data
44
  examples = [
45
  # EDUCATION (most common - 146 samples)
46
  ["What is the highest level of education you have successfully completed?"],
47
  ["What is the highest level of education your mother successfully completed?"],
 
48
 
49
  # POLITICS (100 samples)
50
  ["Which party did you vote for in the last national election?"],
51
- ["How likely are governments in enough countries to take action to reduce climate change?"],
52
  ["Trust in country's parliament"],
 
 
53
 
54
  # HEALTH (90 samples)
55
  ["How satisfied are you with the healthcare system?"],
56
  ["Which health problems that you had in the last 12 months hampered you in your daily activities?"],
 
57
 
58
  # LABOUR AND EMPLOYMENT (82 samples)
59
  ["What best describes what you have been doing for the last 7 days - in paid work?"],
60
  ["Which description best describes the sort of work your mother did when you were 14?"],
 
 
61
 
62
  # SOCIETY AND CULTURE (73 samples)
63
  ["How often do you pray apart from at religious services?"],
64
  ["How important is it to always behave properly and avoid doing anything people would say is wrong?"],
 
65
 
66
- # Additional examples
67
  ["What is your age?"],
68
- ["Do you feel safe walking alone at night in your local area?"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  ]
70
 
71
- # Create Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  demo = gr.Interface(
73
  fn=classify_text,
74
  inputs=gr.Textbox(
75
  lines=3,
76
  placeholder="Enter a survey question or variable description...",
77
- label="Input Text"
78
  ),
79
  outputs=gr.Markdown(label="Classification Result"),
80
- title="ESS Variable Classification",
81
  description="""
82
- Classify European Social Survey (ESS) variables into 19 subject categories.
83
-
84
- This model is fine-tuned from XLM-RoBERTa-Base and achieves 83.8% accuracy on the test set.
 
 
 
 
 
 
 
 
 
 
85
  """,
86
  examples=examples,
87
  article="""
88
- ### About
 
 
 
 
 
 
 
 
 
 
89
 
90
- This classifier helps organize survey variables by automatically categorizing them into subject areas.
91
- Built on [XLM-RoBERTa-Base](https://huggingface.co/FacebookAI/xlm-roberta-base),
92
- trained on European Social Survey metadata.
93
 
94
- **Model:** [benjaminBeuster/xlm-roberta-base-ess-classification](https://huggingface.co/benjaminBeuster/xlm-roberta-base-ess-classification)
 
 
 
95
 
96
- **Performance:** 83.8% accuracy | F1: 0.796 (weighted)
 
 
 
 
 
 
 
 
 
 
 
97
  """,
98
- theme=gr.themes.Soft(),
99
- allow_flagging="never"
 
 
 
 
100
  )
101
 
102
  if __name__ == "__main__":
 
1
  """
2
  HuggingFace Space - ESS Variable Classification Demo
3
  Interactive Gradio interface for the XLM-RoBERTa ESS classifier.
4
+ Developed by Sikt - Norwegian Agency for Shared Services in Education and Research
5
  """
6
  import gradio as gr
7
  from transformers import pipeline
 
10
  MODEL_NAME = "benjaminBeuster/xlm-roberta-base-ess-classification"
11
  classifier = pipeline("text-classification", model=MODEL_NAME)
12
 
13
+ # Sikt brand colors
14
+ SIKT_COLORS = {
15
+ "amaranth": "#ee3243", # Primary accent
16
+ "meteorite": "#331c6c", # Dark
17
+ "selago": "#f3f1fe" # Light
18
+ }
19
+
20
  # Category descriptions
21
  CATEGORY_INFO = {
22
  "DEMOGRAPHY (POPULATION, VITAL STATISTICS, AND CENSUSES)": "Demographics, population statistics, age, gender",
23
+ "ECONOMICS": "Economic issues, finance, income, wealth",
24
  "EDUCATION": "Education, schooling, qualifications",
25
  "HEALTH": "Healthcare, medical services, health satisfaction",
26
  "POLITICS": "Political systems, trust in government, parliament",
27
  "SOCIETY AND CULTURE": "Social issues, cultural topics, religion",
28
  "LABOUR AND EMPLOYMENT": "Work, occupation, employment status",
29
  "PSYCHOLOGY": "Mental health, psychological wellbeing",
30
+ "HOUSING AND LAND USE": "Housing conditions, residential environment",
31
+ "NATURAL ENVIRONMENT": "Environmental concerns, climate change",
32
+ "LAW, CRIME AND LEGAL SYSTEMS": "Justice, crime, legal matters",
33
+ "MEDIA, COMMUNICATION AND LANGUAGE": "Media use, communication patterns",
34
+ "SOCIAL STRATIFICATION AND GROUPINGS": "Social class, inequality, social groups",
35
+ "SOCIAL WELFARE POLICY AND SYSTEMS": "Social benefits, welfare services",
36
+ "TRANSPORT AND TRAVEL": "Transportation, mobility, travel patterns",
37
+ "TRADE, INDUSTRY AND MARKETS": "Business, commerce, markets",
38
+ "SCIENCE AND TECHNOLOGY": "Scientific advancement, technology use",
39
+ "HISTORY": "Historical events, memory, heritage",
40
  "OTHER": "General or uncategorized topics"
41
  }
42
 
 
58
 
59
  return output
60
 
61
+ # Example questions - mix of actual ESS data and generated diverse questions
62
  examples = [
63
  # EDUCATION (most common - 146 samples)
64
  ["What is the highest level of education you have successfully completed?"],
65
  ["What is the highest level of education your mother successfully completed?"],
66
+ ["How many years of full-time education have you completed?"],
67
 
68
  # POLITICS (100 samples)
69
  ["Which party did you vote for in the last national election?"],
 
70
  ["Trust in country's parliament"],
71
+ ["How satisfied are you with the way democracy works in your country?"],
72
+ ["How much do you trust the legal system?"],
73
 
74
  # HEALTH (90 samples)
75
  ["How satisfied are you with the healthcare system?"],
76
  ["Which health problems that you had in the last 12 months hampered you in your daily activities?"],
77
+ ["How is your health in general - very good, good, fair, bad, or very bad?"],
78
 
79
  # LABOUR AND EMPLOYMENT (82 samples)
80
  ["What best describes what you have been doing for the last 7 days - in paid work?"],
81
  ["Which description best describes the sort of work your mother did when you were 14?"],
82
+ ["How many hours do you normally work per week in your main job?"],
83
+ ["Are you a member of a trade union or similar organization?"],
84
 
85
  # SOCIETY AND CULTURE (73 samples)
86
  ["How often do you pray apart from at religious services?"],
87
  ["How important is it to always behave properly and avoid doing anything people would say is wrong?"],
88
+ ["Do you consider yourself as belonging to any particular religion or denomination?"],
89
 
90
+ # DEMOGRAPHY
91
  ["What is your age?"],
92
+ ["What is your gender?"],
93
+ ["What is your current legal marital status?"],
94
+ ["In which country were you born?"],
95
+
96
+ # ECONOMICS
97
+ ["Which of the descriptions on this card comes closest to how you feel about your household's income nowadays?"],
98
+ ["What is your household's total net income from all sources?"],
99
+
100
+ # PSYCHOLOGY
101
+ ["Taking all things together, how happy would you say you are?"],
102
+ ["Have you felt depressed or sad in the last two weeks?"],
103
+ ["How often do you feel stressed?"],
104
+
105
+ # NATURAL ENVIRONMENT
106
+ ["How worried are you about climate change?"],
107
+ ["To what extent do you think climate change is caused by human activity?"],
108
+
109
+ # LAW, CRIME AND LEGAL SYSTEMS
110
+ ["How safe do you feel walking alone at night in your local area?"],
111
+ ["Have you or a member of your household been a victim of burglary or assault in the last 5 years?"],
112
+
113
+ # MEDIA, COMMUNICATION AND LANGUAGE
114
+ ["How much time do you spend watching television on an average weekday?"],
115
+ ["How often do you use the internet for news?"],
116
+
117
+ # SOCIAL STRATIFICATION AND GROUPINGS
118
+ ["In society there are groups which tend to be towards the top and groups which tend to be towards the bottom. Where would you place yourself?"],
119
+ ["Do you belong to any discriminated group in this country?"],
120
+
121
+ # HOUSING AND LAND USE
122
+ ["Do you rent or own your accommodation?"],
123
+ ["How many rooms do you have for your household's use only?"],
124
+
125
+ # SOCIAL WELFARE
126
+ ["Should the government reduce income differences?"],
127
+ ["How satisfied are you with the state of social benefits in your country?"],
128
+
129
+ # TRANSPORT
130
+ ["How long does your daily commute to work take?"],
131
+ ["What is your main mode of transportation?"],
132
+
133
+ # SCIENCE AND TECHNOLOGY
134
+ ["To what extent do you think scientific advances benefit society?"],
135
+ ["How often do you use a smartphone or tablet?"],
136
  ]
137
 
138
+ # Custom CSS for Sikt branding
139
+ custom_css = """
140
+ .gradio-container {
141
+ font-family: "Source Sans Pro", -apple-system, BlinkMacSystemFont, sans-serif;
142
+ }
143
+ h1 {
144
+ color: #331c6c !important;
145
+ }
146
+ .header-logo {
147
+ display: flex;
148
+ align-items: center;
149
+ gap: 1rem;
150
+ margin-bottom: 1rem;
151
+ }
152
+ button.primary {
153
+ background-color: #ee3243 !important;
154
+ border-color: #ee3243 !important;
155
+ }
156
+ button.primary:hover {
157
+ background-color: #d62839 !important;
158
+ border-color: #d62839 !important;
159
+ }
160
+ .tabs {
161
+ border-color: #331c6c !important;
162
+ }
163
+ footer {
164
+ background-color: #f3f1fe !important;
165
+ }
166
+ """
167
+
168
+ # Create Gradio interface with Sikt branding
169
  demo = gr.Interface(
170
  fn=classify_text,
171
  inputs=gr.Textbox(
172
  lines=3,
173
  placeholder="Enter a survey question or variable description...",
174
+ label="Survey Question"
175
  ),
176
  outputs=gr.Markdown(label="Classification Result"),
177
+ title="🔍 ESS Variable Classification",
178
  description="""
179
+ <div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 1rem;">
180
+ <img src="https://cdn.brandfetch.io/id9VCyV64w/theme/dark/logo.svg?c=1bxid64Mup7aczewSAYMX"
181
+ alt="Sikt Logo" style="height: 40px;">
182
+ <div>
183
+ <p style="margin: 0; color: #331c6c; font-size: 1.1em; font-weight: 500;">
184
+ Developed by <strong>Sikt</strong> – Norwegian Agency for Shared Services in Education and Research
185
+ </p>
186
+ </div>
187
+ </div>
188
+
189
+ Automatically classify European Social Survey (ESS) questions into **19 subject categories**.
190
+
191
+ This AI model is fine-tuned from XLM-RoBERTa-Base and achieves **83.8% accuracy** on the test set.
192
  """,
193
  examples=examples,
194
  article="""
195
+ ---
196
+
197
+ ### About This Tool
198
+
199
+ This classifier helps researchers and data managers organize survey variables by automatically
200
+ categorizing them into subject areas. The model was trained on European Social Survey metadata
201
+ and can classify questions into categories including:
202
+
203
+ - **Education** • **Politics** • **Health** • **Labour & Employment**
204
+ - **Society & Culture** • **Economics** • **Psychology** • **Demographics**
205
+ - And 11 more categories
206
 
207
+ ### Technical Details
 
 
208
 
209
+ - **Base Model:** [XLM-RoBERTa-Base](https://huggingface.co/FacebookAI/xlm-roberta-base) (125M parameters)
210
+ - **Fine-tuned Model:** [benjaminBeuster/xlm-roberta-base-ess-classification](https://huggingface.co/benjaminBeuster/xlm-roberta-base-ess-classification)
211
+ - **Performance:** 83.8% accuracy | F1: 0.796 (weighted) | 105 test samples
212
+ - **Training Data:** [ESS Classification Dataset](https://huggingface.co/datasets/benjaminBeuster/ess_classification)
213
 
214
+ ### About Sikt
215
+
216
+ [Sikt](https://sikt.no) – Norwegian Agency for Shared Services in Education and Research
217
+ provides digital infrastructure and services for research and education in Norway.
218
+
219
+ ---
220
+
221
+ <div style="text-align: center; padding: 1rem; background-color: #f3f1fe; border-radius: 8px; margin-top: 1rem;">
222
+ <p style="color: #331c6c; margin: 0;">
223
+ Questions or feedback? Visit <a href="https://sikt.no" style="color: #ee3243; text-decoration: none; font-weight: 600;">sikt.no</a>
224
+ </p>
225
+ </div>
226
  """,
227
+ theme=gr.themes.Soft(
228
+ primary_hue="red",
229
+ secondary_hue="purple",
230
+ ),
231
+ css=custom_css,
232
+ flagging_mode="never"
233
  )
234
 
235
  if __name__ == "__main__":