Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import torch | |
import torchtext | |
from torchtext.data.utils import get_tokenizer | |
from model import create_model | |
tokenizer = get_tokenizer("basic_english") | |
model = create_model() | |
model.load_state_dict(torch.load(f="model.pt", map_location=torch.device("cpu"))) | |
vocab = torch.load("vocab.pt", map_location=torch.device("cpu")) | |
text_pipeline = lambda x: vocab(tokenizer(x)) | |
def predict(text): | |
with torch.no_grad(): | |
text = torch.tensor(text_pipeline(text), dtype=torch.int64) | |
text = torch.unsqueeze(text, 0) | |
result = model(text).squeeze() | |
result = (round(result.item(), 2)) * 10000 | |
return result | |
title = "Salary Predictor" | |
description = "This is a test project to see if I could built a Machine Learning model to predict salary offered based on a posted job description. To test, copy a whole job description from a linkedIn post. Results are in USD." | |
article = """ This project was built by Marie Pelletier <br/> | |
<a href="https://www.linkedin.com/in/marie-pelletier-14837441/">LinkedIn</a><br/> | |
<a href="https://github.com/Marie000">Github</a> | |
</p> | |
<p>This is a work in progress and is not meant to be used as an accurate predictor of | |
salary. It is limited by the <a | |
href="https://www.kaggle.com/datasets/arshkon/linkedin-job-postings/data"> | |
data that was | |
used</a>, | |
which is US-based, from 2023. It also does not take into account regions or | |
fluctuations in the market over time. The dataset included over 33,000 job postings, | |
but only 13,000 had salary information.</p> | |
<p>For all you statistic nerds, the r-squared score of the model was around 0.6. | |
For the rest of you, that level of accuracy can probably be described as "better than | |
nothing". It might have been better if I had more data to work with. Or maybe there is | |
just so much we can infer from a description alone.</p> | |
<p>For this first test, only the description of the job is taken into account. | |
Adding other information, including the date of the posting and the location, | |
could improve the prediction.</p> | |
<p>The google colab notebook used to generate this model can be found | |
<a href="https://github.com/Marie000/Linkedin-predictor-model">here</a>""" | |
demo = gr.Interface( | |
fn=predict, | |
inputs=gr.Textbox(lines=20, placeholder="copy whole job description here"), | |
outputs="number", | |
title=title, | |
description=description, | |
article=article, | |
) | |
demo.launch(debug=False, share=True, show_error=True) | |