import gradio as gr import os import torch from model import create_model model = create_model() model.load_state_dict(torch.load(f="model.pt", map_location=torch.device("cpu"))) vocab = torch.load("vocab.pt", map_location=torch.device("cpu")) text_pipeline = lambda x: vocab(tokenizer(x)) def predict(text): with torch.no_grad(): text = torch.tensor(text_pipeline(text), dtype=torch.int64) text = torch.unsqueeze(text, 0) result = model(text).squeeze() result = (round(result.item(), 2)) * 10000 return result title = "Salary Predictor" description = "This is a test project to see if I could built a Machine Learning model to predict salary offered based on a posted job description. To test, copy a whole job description from a linkedIn post. Results are in USD." article = ''' This project was built by Marie Pelletier
LinkedIn
Github

This is a work in progress and is not meant to be used as an accurate predictor of salary. It is limited by the data that was used, which is US-based, from 2023. It also does not take into account regions or fluctuations in the market over time. The dataset included over 33,000 job postings, but only 13,000 had salary information.

For all you statistic nerds, the r-squared score of the model was around 0.6. For the rest of you, that level of accuracy can probably be described as "better than nothing". It might have been better if I had more data to work with. Or maybe there is just so much we can infer from a description alone.

For this first test, only the description of the job is taken into account. Adding other information, including the date of the posting and the location, could improve the prediction.

The google colab notebook used to generate this model can be found here''' demo = gr.Interface( fn=predict, inputs=gr.Textbox(lines=20, placeholder="copy whole job description here"), outputs="number", title=title, description=description, article=article ) demo.launch(debug=False, share=True)