File size: 2,152 Bytes
45dd0ef
b06ff0c
 
 
 
 
 
45dd0ef
b06ff0c
 
 
 
 
 
 
 
 
 
45dd0ef
 
b06ff0c
45dd0ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b06ff0c
 
 
 
45dd0ef
 
 
 
 
 
 
 
948e385
 
45dd0ef
 
cfc94db
45dd0ef
cfc94db
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import torch
import pickle
import joblib
import numpy as np
import tensorflow as tf
from keras.utils import pad_sequences
from keras.preprocessing.text import Tokenizer
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load the model from the pickle file
# filename = 'F:/CVFilter/models/model_pk.pkl'
# with open(filename, 'rb') as file:
#     model = pickle.load(file)

# Load the saved model
# model = joblib.load('F:\CVFilter\models\model.joblib')


# Load Local Model and Local tokenizer
# model = tf.keras.models.load_model('models\model.h5')

# tokenfile = 'tokenized_words/tokenized_words.pkl'
# # Load the tokenized words from the pickle file
# with open(tokenfile, 'rb') as file:
#     loaded_tokenized_words = pickle.load(file)

# max_review_length = 200
# tokenizer = Tokenizer(num_words=10000,  #max no. of unique words to keep
#                       filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', 
#                       lower=True #convert to lower case
#                      )
# tokenizer.fit_on_texts(loaded_tokenized_words)


# Load Huggingface model and tokenizer
# Define the model name
model_name = "fazni/distilbert-base-uncased-career-path-prediction"

# Load the model
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

outcome_labels = ['Business Analyst', 'Cyber Security','Data Engineer','Data Science','DevOps','Machine Learning Engineer','Mobile App Developer','Network Engineer','Quality Assurance','Software Engineer']

def model_prediction(text, model=model, tokenizer=tokenizer, labels=outcome_labels):
    # Local model
    # seq = tokenizer.texts_to_sequences([text])
    # padded = pad_sequences(seq, maxlen=max_review_length)
    # pred = model.predict(padded)
    # return labels[np.argmax(pred)]

    # Hugging face model
    # Tokenize the text
    inputs = tokenizer(text, return_tensors="pt",truncation=True, max_length=512)
    outputs = model(**inputs)
    
    # Get the predicted class probabilities
    probs = outputs.logits.softmax(dim=-1)

    return labels[torch.argmax(probs)]