import os import numpy as np import torch import gradio as gr from transformers import TextClassificationPipeline, DistilBertTokenizer, DistilBertForSequenceClassification # HuggingFace dataset to save the flagged examples HF_TOKEN = os.getenv('HF_TOKEN') hf_saver = gr.HuggingFaceDatasetSaver(HF_TOKEN, "wfh-problematic") # model path in hugginface model_path = "yabramuvdi/distilbert-wfh" tokenizer = DistilBertTokenizer.from_pretrained(model_path, use_auth_token=HF_TOKEN) model = DistilBertForSequenceClassification.from_pretrained(model_path, use_auth_token=HF_TOKEN) # create a pipeline for predictions classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True) # basic elements of page title = "Remote Work Detection Application" description = "This page allows users to interact with the __Work-from-Home Algorithmic Measurement (WHAM)__ model developed in the paper [_“Remote Work across Jobs, Companies, and Space” (Hansen, Lambert, Bloom, Davis, Sadun & Taska, 2023)_](https://wfhmap.com/). It is maintained by Yabra Muvdi, who works as a pre-doctoral researcher for Professor Hansen.\n\nThe application allows users to input any arbitrary text and computes the predicted probability of the text exhibiting the possibility of remote work. Users can also flag any examples that are incorrectly classified by the model. This is simply done by clicking on the _“Flag”_ button and then selecting _“mistake”_." article = "" # text at the end of the app examples = [ ["We are looking for a Deputy Home Manager with domiciliary care experience to join our company. You will work from home care facilities with a strong track record of quality service.", 0.5], ["We are open to discussing flexible working arrangements and encourage our people to explore new ways of working - including part-time, job-share or working from different locations. Everyone can ask about it.", 0.5], ["We see the value in work-life balance, many of us take full advantage of our flexible working arrangements, so whether you like to get a surf in before work, like to head home in time to pick up the kids or you just like working from the comfort of your own home now and then, we want to support you.", 0.5], ["The ideal candidate is comfortable in working in remote northern Alberta in the town Peace River, High Level and Fairview.", 0.5], ["With a hybrid mix of time at home as well as our corporate office, this role will suit an analytical, process orientated and people focused payroll professional who thrives in a fast-paced environment.", 0.5] ] #%% def predict_wfh(input_text, input_slider): # get scores from model predictions = classifier(input_text)[0] # use selected threshold to classify as WFH prob_wfh = predictions[1]["score"] if prob_wfh > input_slider: wfh = 1 no_wfh = 0 else: wfh = 0 no_wfh = 1 return({"Not work from home": no_wfh, "Work from home": wfh}, f"Probability of WFH: {np.round(prob_wfh, 3)}") label = gr.Label(num_top_classes=1, type="confidences", label="Binary classification") text_output = gr.Textbox(label="Predicted probability") app = gr.Interface(fn=predict_wfh, inputs=[gr.Textbox(lines=10, label="Input text"), gr.Slider(0, 1, 0.001, label="Classification threshold", default=0.5)], outputs=[label, text_output], theme="huggingface", title=title, description=description, article=article, examples=examples, allow_flagging="manual", flagging_options=["mistake"], #flagging_callback=hf_saver ) #app.launch(auth=("yabra", "wfh123"), auth_message="Authentication Problem") app.launch()