|
|
""" |
|
|
HuggingFace Space - ESS Variable Classification Demo |
|
|
Interactive Gradio interface for the XLM-RoBERTa ESS classifier. |
|
|
Developed by Sikt - Norwegian Agency for Shared Services in Education and Research |
|
|
""" |
|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
MODEL_NAME = "benjaminBeuster/xlm-roberta-base-ess-classification" |
|
|
classifier = pipeline("text-classification", model=MODEL_NAME) |
|
|
|
|
|
|
|
|
SIKT_COLORS = { |
|
|
"amaranth": "#ee3243", |
|
|
"meteorite": "#331c6c", |
|
|
"selago": "#f3f1fe" |
|
|
} |
|
|
|
|
|
|
|
|
CATEGORY_INFO = { |
|
|
"DEMOGRAPHY (POPULATION, VITAL STATISTICS, AND CENSUSES)": "Demographics, population statistics, age, gender", |
|
|
"ECONOMICS": "Economic issues, finance, income, wealth", |
|
|
"EDUCATION": "Education, schooling, qualifications", |
|
|
"HEALTH": "Healthcare, medical services, health satisfaction", |
|
|
"POLITICS": "Political systems, trust in government, parliament", |
|
|
"SOCIETY AND CULTURE": "Social issues, cultural topics, religion", |
|
|
"LABOUR AND EMPLOYMENT": "Work, occupation, employment status", |
|
|
"PSYCHOLOGY": "Mental health, psychological wellbeing", |
|
|
"HOUSING AND LAND USE": "Housing conditions, residential environment", |
|
|
"NATURAL ENVIRONMENT": "Environmental concerns, climate change", |
|
|
"LAW, CRIME AND LEGAL SYSTEMS": "Justice, crime, legal matters", |
|
|
"MEDIA, COMMUNICATION AND LANGUAGE": "Media use, communication patterns", |
|
|
"SOCIAL STRATIFICATION AND GROUPINGS": "Social class, inequality, social groups", |
|
|
"SOCIAL WELFARE POLICY AND SYSTEMS": "Social benefits, welfare services", |
|
|
"TRANSPORT AND TRAVEL": "Transportation, mobility, travel patterns", |
|
|
"TRADE, INDUSTRY AND MARKETS": "Business, commerce, markets", |
|
|
"SCIENCE AND TECHNOLOGY": "Scientific advancement, technology use", |
|
|
"HISTORY": "Historical events, memory, heritage", |
|
|
"OTHER": "General or uncategorized topics" |
|
|
} |
|
|
|
|
|
def classify_text(text): |
|
|
"""Classify survey question/variable.""" |
|
|
if not text.strip(): |
|
|
return "Please enter some text to classify." |
|
|
|
|
|
result = classifier(text)[0] |
|
|
label = result['label'] |
|
|
score = result['score'] |
|
|
|
|
|
|
|
|
output = f"**Category:** {label}\n\n" |
|
|
output += f"**Confidence:** {score:.2%}\n\n" |
|
|
|
|
|
if label in CATEGORY_INFO: |
|
|
output += f"**Description:** {CATEGORY_INFO[label]}" |
|
|
|
|
|
return output |
|
|
|
|
|
|
|
|
|
|
|
examples = [ |
|
|
["How likely, governments in enough countries take action to reduce climate change"], |
|
|
["Country"], |
|
|
["Age of respondent, calculated"], |
|
|
["Partner, control paid work last 7 days"], |
|
|
["Ninth person in household: relationship to respondent"], |
|
|
["Year of birth of eighth person in household"], |
|
|
["Partner's age when completed full time education"], |
|
|
["Religion or denomination belonging to in the past"], |
|
|
["Which party feel closer to"], |
|
|
["Highest level of education"], |
|
|
["Ever unemployed and seeking work for a period more than three months"], |
|
|
["Religion or denomination belonging to at present"], |
|
|
["Partner doing last 7 days: housework, looking after children, others"], |
|
|
["Year of birth of sixth person in household"], |
|
|
["I like to be a leader, to what extent"], |
|
|
["Main activity, last 7 days"], |
|
|
["Mother's highest level of education"], |
|
|
["Main activity last 7 days"], |
|
|
["Doing last 7 days: unemployed, not actively looking for job"], |
|
|
["How feminine respondent feels"], |
|
|
["Father's highest level of education"], |
|
|
["Trust in country's parliament"], |
|
|
["How satisfied are you with the state of education"], |
|
|
["How important to get respect from others"], |
|
|
["Important to show abilities and be admired"], |
|
|
["How often socially meet with friends, relatives or colleagues"], |
|
|
["Placement on left right scale"], |
|
|
["How often pray apart from at religious services"], |
|
|
["Important to help people and care for others well-being"], |
|
|
["Subjective general health"], |
|
|
] |
|
|
|
|
|
|
|
|
custom_css = """ |
|
|
:root { |
|
|
/* Sikt Design Tokens */ |
|
|
--sds-color-text-primary: #1a1a1a; |
|
|
--sds-color-text-secondary: #331c6c; |
|
|
--sds-color-interaction-primary: #7d5da6; |
|
|
--sds-color-interaction-primary-hover: #6b4d94; |
|
|
--sds-color-layout-background-default: #ffffff; |
|
|
--sds-color-layout-background-subtle: #f3f1fe; |
|
|
--sds-color-accent-primary: #ee3243; |
|
|
--sds-space-gap-small: 0.5rem; |
|
|
--sds-space-gap-medium: 1rem; |
|
|
--sds-space-gap-large: 1.5rem; |
|
|
--sds-space-padding-small: 0.75rem; |
|
|
--sds-space-padding-medium: 1rem; |
|
|
--sds-space-padding-large: 1.5rem; |
|
|
--sds-space-border-radius-small: 4px; |
|
|
--sds-space-border-radius-medium: 8px; |
|
|
--sds-space-border-radius-large: 12px; |
|
|
} |
|
|
|
|
|
.gradio-container { |
|
|
font-family: "Source Sans Pro", -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif !important; |
|
|
} |
|
|
|
|
|
h1, .gr-title { |
|
|
color: var(--sds-color-text-secondary) !important; |
|
|
font-weight: 600 !important; |
|
|
} |
|
|
|
|
|
.gr-box { |
|
|
border-radius: var(--sds-space-border-radius-medium) !important; |
|
|
} |
|
|
|
|
|
.gr-button { |
|
|
background-color: var(--sds-color-interaction-primary) !important; |
|
|
border-color: var(--sds-color-interaction-primary) !important; |
|
|
border-radius: var(--sds-space-border-radius-small) !important; |
|
|
font-weight: 500 !important; |
|
|
transition: all 0.2s ease !important; |
|
|
} |
|
|
|
|
|
.gr-button:hover { |
|
|
background-color: var(--sds-color-interaction-primary-hover) !important; |
|
|
border-color: var(--sds-color-interaction-primary-hover) !important; |
|
|
transform: translateY(-1px) !important; |
|
|
box-shadow: 0 2px 8px rgba(125, 93, 166, 0.3) !important; |
|
|
} |
|
|
|
|
|
.gr-button-primary { |
|
|
background: linear-gradient(135deg, var(--sds-color-interaction-primary) 0%, #6b4d94 100%) !important; |
|
|
} |
|
|
|
|
|
.gr-input, .gr-textbox { |
|
|
border-color: #e0e0e0 !important; |
|
|
border-radius: var(--sds-space-border-radius-small) !important; |
|
|
} |
|
|
|
|
|
.gr-input:focus, .gr-textbox:focus { |
|
|
border-color: var(--sds-color-interaction-primary) !important; |
|
|
box-shadow: 0 0 0 2px rgba(125, 93, 166, 0.1) !important; |
|
|
} |
|
|
|
|
|
.gr-panel { |
|
|
background-color: var(--sds-color-layout-background-subtle) !important; |
|
|
border-radius: var(--sds-space-border-radius-medium) !important; |
|
|
padding: var(--sds-space-padding-large) !important; |
|
|
} |
|
|
|
|
|
.gr-form { |
|
|
gap: var(--sds-space-gap-medium) !important; |
|
|
} |
|
|
|
|
|
footer { |
|
|
background-color: var(--sds-color-layout-background-subtle) !important; |
|
|
border-top: 1px solid #e0e0e0 !important; |
|
|
} |
|
|
|
|
|
.sikt-logo { |
|
|
max-width: 120px; |
|
|
height: auto; |
|
|
} |
|
|
|
|
|
.sikt-header { |
|
|
background: linear-gradient(135deg, #f3f1fe 0%, #ffffff 100%); |
|
|
padding: var(--sds-space-padding-medium); |
|
|
border-radius: var(--sds-space-border-radius-medium); |
|
|
margin-bottom: var(--sds-space-gap-large); |
|
|
border-left: 4px solid var(--sds-color-interaction-primary); |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=classify_text, |
|
|
inputs=gr.Textbox( |
|
|
lines=3, |
|
|
placeholder="Enter a survey question or variable description...", |
|
|
label="Survey Question" |
|
|
), |
|
|
outputs=gr.Markdown(label="Classification Result"), |
|
|
title="ESS Variable Classifier Prototype", |
|
|
description=""" |
|
|
<div class="sikt-header"> |
|
|
<div style="display: flex; align-items: center; gap: 1.5rem; flex-wrap: wrap;"> |
|
|
<img src="https://modansa.blob.core.windows.net/testcontainer/Sikt-Prim%C3%A6rlogo-M%C3%B8rk_0.png" alt="Sikt Logo" class="sikt-logo"> |
|
|
<div style="flex: 1; min-width: 300px;"> |
|
|
<h3 style="margin: 0 0 0.5rem 0; color: #331c6c; font-size: 1.25rem; font-weight: 600;"> |
|
|
ESS Variable Classifier Prototype |
|
|
</h3> |
|
|
<p style="margin: 0; color: #1a1a1a; font-size: 0.95rem; line-height: 1.5;"> |
|
|
Developed by <strong>Sikt</strong> β Norwegian Agency for Shared Services in Education and Research |
|
|
</p> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
Automatically classify European Social Survey (ESS) questions into **19 subject categories**. This AI model is fine-tuned from XLM-RoBERTa-Base and achieves **83.8% accuracy**. |
|
|
|
|
|
**β οΈ Prototype Notice:** This model is trained on 582 samples. Only **8 categories** have reliable training data (β₯20 samples): **Education, Politics, Society and Culture, Demography, Labour and Employment, Health, Psychology, and Other**. Results for other categories should be interpreted with caution. |
|
|
""", |
|
|
examples=examples, |
|
|
article=""" |
|
|
<div style="margin-top: 2rem; padding-top: 2rem; border-top: 2px solid var(--sds-color-layout-background-subtle);"> |
|
|
|
|
|
<div style="background: linear-gradient(135deg, #f3f1fe 0%, #ffffff 100%); padding: 1.5rem; border-radius: var(--sds-space-border-radius-medium); margin-bottom: 2rem; border-left: 4px solid var(--sds-color-interaction-primary);"> |
|
|
<h3 style="color: var(--sds-color-text-secondary); margin-top: 0; font-weight: 600;">π About This Tool</h3> |
|
|
<p style="color: var(--sds-color-text-primary); line-height: 1.6;"> |
|
|
This classifier helps researchers and data managers organize survey variables by automatically |
|
|
categorizing them into subject areas. The model was trained on European Social Survey metadata |
|
|
and can classify questions into <strong>19 major categories</strong> from the |
|
|
<a href="https://vocabularies.cessda.eu/vocabulary/TopicClassification" target="_blank" style="color: var(--sds-color-interaction-primary); text-decoration: none; font-weight: 600;">CESSDA Topic Classification</a>: |
|
|
</p> |
|
|
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 0.5rem; margin-top: 1rem;"> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π EDUCATION</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">ποΈ POLITICS</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π₯ HEALTH</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πΌ LABOUR AND EMPLOYMENT</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π SOCIETY AND CULTURE</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π° ECONOMICS</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π§ PSYCHOLOGY</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π₯ DEMOGRAPHY (POPULATION, VITAL STATISTICS, AND CENSUSES)</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π HOUSING AND LAND USE</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π± NATURAL ENVIRONMENT</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">βοΈ LAW, CRIME AND LEGAL SYSTEMS</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πΊ MEDIA, COMMUNICATION AND LANGUAGE</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π SOCIAL STRATIFICATION AND GROUPINGS</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π€ SOCIAL WELFARE POLICY AND SYSTEMS</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π TRANSPORT AND TRAVEL</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πͺ TRADE, INDUSTRY AND MARKETS</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π¬ SCIENCE AND TECHNOLOGY</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π HISTORY</span> |
|
|
<span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">π OTHER</span> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div style="background: white; padding: 1.5rem; border-radius: var(--sds-space-border-radius-medium); margin-bottom: 2rem; border: 1px solid #e0e0e0;"> |
|
|
<h3 style="color: var(--sds-color-text-secondary); margin-top: 0; font-weight: 600;">π¬ Technical Details</h3> |
|
|
<ul style="color: var(--sds-color-text-primary); line-height: 1.8; padding-left: 1.5rem;"> |
|
|
<li><strong>Base Model:</strong> <a href="https://huggingface.co/FacebookAI/xlm-roberta-base" style="color: var(--sds-color-interaction-primary);">XLM-RoBERTa-Base</a> (125M parameters)</li> |
|
|
<li><strong>Fine-tuned Model:</strong> <a href="https://huggingface.co/benjaminBeuster/xlm-roberta-base-ess-classification" style="color: var(--sds-color-interaction-primary);">benjaminBeuster/xlm-roberta-base-ess-classification</a></li> |
|
|
<li><strong>Performance:</strong> 83.8% accuracy | F1: 0.796 (weighted) | 105 test samples</li> |
|
|
<li><strong>Training Data:</strong> <a href="https://huggingface.co/datasets/benjaminBeuster/ess_classification" style="color: var(--sds-color-interaction-primary);">ESS Classification Dataset</a></li> |
|
|
</ul> |
|
|
</div> |
|
|
|
|
|
<div style="background: linear-gradient(135deg, var(--sds-color-layout-background-subtle) 0%, white 100%); padding: 1.5rem; border-radius: var(--sds-space-border-radius-medium); text-align: center;"> |
|
|
<h3 style="color: var(--sds-color-text-secondary); margin-top: 0; font-weight: 600;">About Sikt</h3> |
|
|
<p style="color: var(--sds-color-text-primary); line-height: 1.6; max-width: 600px; margin: 0 auto 1rem auto;"> |
|
|
<a href="https://sikt.no" style="color: var(--sds-color-interaction-primary); text-decoration: none; font-weight: 600;">Sikt</a> |
|
|
β Norwegian Agency for Shared Services in Education and Research provides digital infrastructure |
|
|
and services for research and education in Norway. |
|
|
</p> |
|
|
<p style="margin-top: 1.5rem;"> |
|
|
<a href="https://sikt.no" style="display: inline-block; padding: 0.75rem 1.5rem; background-color: var(--sds-color-interaction-primary); color: white; text-decoration: none; border-radius: var(--sds-space-border-radius-small); font-weight: 600; transition: all 0.2s;"> |
|
|
Visit sikt.no β |
|
|
</a> |
|
|
</p> |
|
|
</div> |
|
|
|
|
|
</div> |
|
|
""", |
|
|
theme=gr.themes.Soft( |
|
|
primary_hue="red", |
|
|
secondary_hue="purple", |
|
|
), |
|
|
css=custom_css |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|