Spaces:

celise88
/

Pathfinder

Runtime error

App Files Files Community

celise88 commited on Jan 29, 2024

Commit

54919c4

1 Parent(s): 3588e6d

migrate to mistral LLM

Browse files

Files changed (26) hide show

.env +0 -1
.gitattributes +1 -2
main.py +18 -18
match_utils.py +52 -54
requirements.txt +2 -1
static/model_shards/config.json +0 -25
static/model_shards/pytorch_model-00001-of-00006.bin +0 -3
static/model_shards/pytorch_model-00002-of-00006.bin +0 -3
static/model_shards/pytorch_model-00003-of-00006.bin +0 -3
static/model_shards/pytorch_model-00004-of-00006.bin +0 -3
static/model_shards/pytorch_model-00005-of-00006.bin +0 -3
static/model_shards/pytorch_model-00006-of-00006.bin +0 -3
static/model_shards/pytorch_model.bin.index.json +0 -111
static/styles.css +15 -0
static/tokenizer_shards/special_tokens_map.json +0 -7
static/tokenizer_shards/tokenizer.json +0 -0
static/tokenizer_shards/tokenizer_config.json +0 -14
static/tokenizer_shards/vocab.txt +0 -0
templates/candidate_matcher.html +5 -10
templates/find_hire.html +1 -1
templates/find_match.html +1 -4
templates/find_my_match.html +6 -11
templates/job_list.html +1 -1
templates/login.html +1 -1
templates/logout.html +1 -1
templates/register.html +1 -1

.env DELETED Viewed

	@@ -1 +0,0 @@
1	- COHERE_TOKEN=''

.gitattributes CHANGED Viewed

	@@ -1,2 +1 @@
1	- static/embeddings/*.csv filter=lfs diff=lfs merge=lfs -text
2	- static/model_shards/*.bin filter=lfs diff=lfs merge=lfs -text


1	+ static/embeddings/*.csv filter=lfs diff=lfs merge=lfs -text

main.py CHANGED Viewed

@@ -7,7 +7,7 @@
 # License: MIT License
 # IMPORTS
-from fastapi import FastAPI, Request, Form, File, UploadFile, BackgroundTasks, Depends
 from fastapi.templating import Jinja2Templates
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import HTMLResponse
@@ -19,7 +19,7 @@ from localStoragePy import localStoragePy
 localStorage = localStoragePy('pathfinder', 'text')
 from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks, get_onet_activities, get_onet_context, get_onet_skills, get_onet_knowledge, get_onet_abilities, get_onet_interests, get_onet_styles, get_onet_values, get_job_postings
-from match_utils import neighborhoods, get_resume, skillNER, sim_result_loop, get_links, coSkillEmbed, sim_result_loop_jobFinder, sim_result_loop_candFinder
 from user_utils import Hash
 # APP SETUP
@@ -171,24 +171,24 @@ async def post_matches(request: Request, bt: BackgroundTasks, resume: UploadFile
     username = localStorage.getItem('username')
-    def add_data_to_db(resume):
         db = pd.read_csv('static/res_embeddings.csv')
-        embeds = format(coSkillEmbed(resume)).replace('[[','').replace(']]','').split(',')
         db.iloc[db['username']== username,5:] = embeds
         db.to_csv('static/res_embeddings.csv', index=False)
-    def get_jobs_from_db(resume):
-        job_matches = sim_result_loop_jobFinder(resume)
         print(job_matches)
     resume = get_resume(resume)
-    skills = await skillNER(resume)
-    simResults = await sim_result_loop(resume)
     links = get_links(simResults[0])
     if username is not None:
-        bt.add_task(add_data_to_db, resume)
-        bt.add_task(get_jobs_from_db, resume)
     return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults[0], 'links': links, 'statelist': statelist})
@@ -212,24 +212,24 @@ async def post_matches(request: Request, bt: BackgroundTasks, jobdesc: UploadFil
     username = localStorage.getItem('username')
-    def add_data_to_db(jobdesc):
         db = pd.read_csv('static/jd_embeddings.csv')
-        embeds = format(coSkillEmbed(jobdesc)).replace('[[','').replace(']]','').split(',')
         db.iloc[db['username']== username,5:] = embeds
         db.to_csv('static/jd_embeddings.csv', index=False)
-    def get_cand_from_db(jobdesc):
-        cand_matches = sim_result_loop_candFinder(jobdesc)
         print(cand_matches)
     jobdesc = get_resume(jobdesc)
-    skills = await skillNER(jobdesc)
-    simResults = await sim_result_loop(jobdesc)
     links = get_links(simResults[0])
     if username is not None:
-        bt.add_task(add_data_to_db, jobdesc)
-        bt.add_task(get_cand_from_db, jobdesc)
     return templates.TemplateResponse('candidate_matcher.html', context={'request': request, 'jobdesc': jobdesc, 'skills': skills, 'simResults': simResults[0], 'links': links})

 # License: MIT License
 # IMPORTS
+from fastapi import FastAPI, Request, Form, File, UploadFile, BackgroundTasks
 from fastapi.templating import Jinja2Templates
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import HTMLResponse
 localStorage = localStoragePy('pathfinder', 'text')
 from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks, get_onet_activities, get_onet_context, get_onet_skills, get_onet_knowledge, get_onet_abilities, get_onet_interests, get_onet_styles, get_onet_values, get_job_postings
+from match_utils import neighborhoods, get_resume, skill_extractor, sim_result_loop, get_links, skillEmbed, sim_result_loop_jobFinder, sim_result_loop_candFinder
 from user_utils import Hash
 # APP SETUP
     username = localStorage.getItem('username')
+    def add_data_to_db(skills):
         db = pd.read_csv('static/res_embeddings.csv')
+        embeds = format(skillEmbed(skills)).replace('[[','').replace(']]','').replace('[','').replace(']','').split(',')
         db.iloc[db['username']== username,5:] = embeds
         db.to_csv('static/res_embeddings.csv', index=False)
+    def get_jobs_from_db(skills):
+        job_matches = sim_result_loop_jobFinder(skills)
         print(job_matches)
     resume = get_resume(resume)
+    skills = skill_extractor(resume)
+    simResults = await sim_result_loop(skills)
     links = get_links(simResults[0])
     if username is not None:
+        bt.add_task(add_data_to_db, skills)
+        bt.add_task(get_jobs_from_db, skills)
     return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults[0], 'links': links, 'statelist': statelist})
     username = localStorage.getItem('username')
+    def add_data_to_db(skills):
         db = pd.read_csv('static/jd_embeddings.csv')
+        embeds = format(skillEmbed(skills)).replace('[[','').replace(']]','').split(',')
         db.iloc[db['username']== username,5:] = embeds
         db.to_csv('static/jd_embeddings.csv', index=False)
+    def get_cand_from_db(skills):
+        cand_matches = sim_result_loop_candFinder(skills)
         print(cand_matches)
     jobdesc = get_resume(jobdesc)
+    skills = skill_extractor(jobdesc)
+    simResults = await sim_result_loop(skills)
     links = get_links(simResults[0])
     if username is not None:
+        bt.add_task(add_data_to_db, skills)
+        bt.add_task(get_cand_from_db, skills)
     return templates.TemplateResponse('candidate_matcher.html', context={'request': request, 'jobdesc': jobdesc, 'skills': skills, 'simResults': simResults[0], 'links': links})

match_utils.py CHANGED Viewed

@@ -1,19 +1,15 @@
-from cleantext import clean
-import string
-from nltk.tokenize import SpaceTokenizer
-import nltk
-import cohere
-from cohere import CohereError
-import os
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 from docx import Document
 import pandas as pd
 import numpy as np
 from numpy.linalg import norm
 import ssl
-from dotenv import load_dotenv
 import plotly_express as px
 from scrape_onet import get_onet_code
 # SSL CERTIFICATE FIX
 try:
@@ -23,24 +19,20 @@ except AttributeError:
 else:
     ssl._create_default_https_context = _create_unverified_https_context
-# DOWNLOAD NLTK DATA IF NOT ALREADY DOWNLOADED
-if os.path.isdir('nltk_data')==False:
-    nltk.download('stopwords', quiet=True)
-# LOAD ENVIRONMENT VARIABLES
-load_dotenv()
 # LOAD COHERE EMBEDDINGS:
 simdat = pd.read_csv('static/embeddings/cohere_embeddings.csv')
 coheredat = pd.read_csv('static/cohere_tSNE_dat.csv')
-# LOAD FINE-TUNED MODEL
-# (see https://huggingface.co/celise88/distilbert-base-uncased-finetuned-binary-classifier)
-model = AutoModelForSequenceClassification.from_pretrained('static/model_shards', low_cpu_mem_usage=True)
-tokenizer = AutoTokenizer.from_pretrained('static/tokenizer_shards', low_cpu_mem_usage=True)
-classifier = pipeline('text-classification', model = model, tokenizer = tokenizer)
 # UTILITY FUNCTIONS
 async def neighborhoods(jobtitle=None):
     def format_title(logo, title, subtitle, title_font_size = 28, subtitle_font_size=14):
         logo = f'<a href="/" target="_self">{logo}</a>'
@@ -52,6 +44,7 @@ async def neighborhoods(jobtitle=None):
     fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
     fig.write_html('templates/job_neighborhoods.html')
 def get_resume(resume):
     path = f"static/{resume.filename}"
     with open(path, 'wb') as buffer:
@@ -63,15 +56,35 @@ def get_resume(resume):
     resume = "\n".join(text)
     return resume
-def coSkillEmbed(text):
-    try:
-        co = cohere.Client(os.getenv("COHERE_TOKEN"))
-        response = co.embed(
-            model='large',
-            texts=[text])
-        return response.embeddings
-    except CohereError as e:
-        return e
 async def sim_result_loop(skilltext):
     if type(skilltext) == str:
@@ -79,7 +92,9 @@ async def sim_result_loop(skilltext):
     if type(skilltext) == dict:
         skills = [key for key, value in skilltext.items() if value == "Skill"]
         skills = str(skills).replace("'", "").replace(",", "")
-    embeds = coSkillEmbed(skills)
     def cosine(A, B):
         return np.dot(A,B)/(norm(A)*norm(B))
     def format_sim(sim):
@@ -102,25 +117,6 @@ async def sim_result_loop(skilltext):
         simResults.iloc[x,1] = format_sim(simResults.iloc[x,1])
     return simResults, embeds
-async def skillNER(resume):
-    def clean_my_text(text):
-        clean_text = ' '.join(text.splitlines())
-        clean_text = clean_text.replace('-', " ").replace("/"," ")
-        clean_text = clean(clean_text.translate(str.maketrans('', '', string.punctuation)))
-        return clean_text
-    resume = clean_my_text(resume)
-    stops = set(nltk.corpus.stopwords.words('english'))
-    stops = stops.union({'eg', 'ie', 'etc', 'experience', 'experiences', 'experienced', 'experiencing', 'knowledge',
-    'ability', 'abilities', 'skill', 'skills', 'skilled', 'including', 'includes', 'included', 'include'
-    'education', 'follow', 'following', 'follows', 'followed', 'make', 'made', 'makes', 'making', 'maker',
-    'available', 'large', 'larger', 'largescale', 'client', 'clients', 'responsible', 'x', 'many', 'team', 'teams',
-    'concern', 'concerned', 'concerning', 'concerns', 'space', 'spaces', 'spaced'})
-    resume = [word for word in SpaceTokenizer().tokenize(resume) if word not in stops]
-    resume = [word for word in resume if ")" not in word]
-    resume = [word for word in resume if "(" not in word]
-    skills = {}
-    [skills.update({word : "Skill"}) if classifier(word)[0]['label'] == 'LABEL_1' else skills.update({word: "Not Skill"}) for word in resume]
-    return skills
 def get_links(simResults):
     links = []
@@ -128,8 +124,9 @@ def get_links(simResults):
     [links.append("https://www.onetonline.org/link/summary/" + get_onet_code(title)) for title in titles]
     return links
-def sim_result_loop_jobFinder(resume):
-    embeds = coSkillEmbed(resume)
     def cosine(A, B):
         return np.dot(A,B)/(norm(A)*norm(B))
     def format_sim(sim):
@@ -149,8 +146,9 @@ def sim_result_loop_jobFinder(resume):
         simResults.iloc[x,2] = format_sim(simResults.iloc[x,2])
     return simResults
-def sim_result_loop_candFinder(jobdesc):
-    embeds = coSkillEmbed(jobdesc)
     def cosine(A, B):
         return np.dot(A,B)/(norm(A)*norm(B))
     def format_sim(sim):

 from docx import Document
 import pandas as pd
 import numpy as np
 from numpy.linalg import norm
 import ssl
 import plotly_express as px
 from scrape_onet import get_onet_code
+from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
+from langchain_community.llms.ollama import Ollama
+from langchain_community.embeddings import OllamaEmbeddings
+from langchain.chains import LLMChain
+from langchain.output_parsers import CommaSeparatedListOutputParser
 # SSL CERTIFICATE FIX
 try:
 else:
     ssl._create_default_https_context = _create_unverified_https_context
 # LOAD COHERE EMBEDDINGS:
 simdat = pd.read_csv('static/embeddings/cohere_embeddings.csv')
 coheredat = pd.read_csv('static/cohere_tSNE_dat.csv')
+# LOAD LLM MODELS:
+model = Ollama(model="mistral")
+embedding_model = OllamaEmbeddings(model="mistral")
+parser = CommaSeparatedListOutputParser()
 # UTILITY FUNCTIONS
+def remove_new_line(value):
+        return ''.join(value.splitlines())
 async def neighborhoods(jobtitle=None):
     def format_title(logo, title, subtitle, title_font_size = 28, subtitle_font_size=14):
         logo = f'<a href="/" target="_self">{logo}</a>'
     fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
     fig.write_html('templates/job_neighborhoods.html')
 def get_resume(resume):
     path = f"static/{resume.filename}"
     with open(path, 'wb') as buffer:
     resume = "\n".join(text)
     return resume
+def skill_extractor(resume):
+     system_prompt_template = SystemMessagePromptTemplate.from_template("""
+     ### [INST]
+     Instruction: You are an expert job analyst tasked with identifying both technical and soft skills in resumes.
+     You always respond in the following format: 'skill1, skill2, skill3, ...' and never provide an explanation or justification for your response.
+     For example, given the following statement in a resume: 'significant experience in python and familiarity with machine learning packages, such as sklearn, torch, and tensorflow'
+     you respond: 'python, sklearn, torch, tensorflow'.
+     [/INST]
+     """)
+     human_prompt_template = HumanMessagePromptTemplate.from_template("""
+     ### QUESTION:
+     What skills are in the following resume?:
+     {resume}
+     """)
+     prompt = ChatPromptTemplate.from_messages([system_prompt_template, human_prompt_template])
+     llm_chain = LLMChain(llm=model, prompt=prompt)
+     result = llm_chain.invoke({"resume": resume})
+     result = remove_new_line(result['text'])
+     return parser.parse(result)
+def skillEmbed(skills):
+    embeddings = embedding_model.embed_query(skills)
+    return embeddings
 async def sim_result_loop(skilltext):
     if type(skilltext) == str:
     if type(skilltext) == dict:
         skills = [key for key, value in skilltext.items() if value == "Skill"]
         skills = str(skills).replace("'", "").replace(",", "")
+    if type(skilltext) == list:
+        skills = ', '.join(skilltext)
+    embeds = skillEmbed(skills)
     def cosine(A, B):
         return np.dot(A,B)/(norm(A)*norm(B))
     def format_sim(sim):
         simResults.iloc[x,1] = format_sim(simResults.iloc[x,1])
     return simResults, embeds
 def get_links(simResults):
     links = []
     [links.append("https://www.onetonline.org/link/summary/" + get_onet_code(title)) for title in titles]
     return links
+def sim_result_loop_jobFinder(skills):
+    embeds = skillEmbed(skills)
     def cosine(A, B):
         return np.dot(A,B)/(norm(A)*norm(B))
     def format_sim(sim):
         simResults.iloc[x,2] = format_sim(simResults.iloc[x,2])
     return simResults
+def sim_result_loop_candFinder(skills):
+    embeds = skillEmbed(skills)
     def cosine(A, B):
         return np.dot(A,B)/(norm(A)*norm(B))
     def format_sim(sim):

requirements.txt CHANGED Viewed

@@ -22,4 +22,5 @@ passlib==1.7.4
 localStoragePy==0.2.3
 sentence-transformers==2.2.2
 mangum==0.17.0
-certifi==2023.7.22

 localStoragePy==0.2.3
 sentence-transformers==2.2.2
 mangum==0.17.0
+certifi==2023.7.22
+langchain==0.1.4

static/model_shards/config.json DELETED Viewed

@@ -1,25 +0,0 @@
-{
-  "_name_or_path": "celise88/distilbert-base-uncased-finetuned-binary-classifier",
-  "activation": "gelu",
-  "architectures": [
-    "DistilBertForSequenceClassification"
-  ],
-  "attention_dropout": 0.1,
-  "dim": 768,
-  "dropout": 0.1,
-  "hidden_dim": 3072,
-  "initializer_range": 0.02,
-  "max_position_embeddings": 512,
-  "model_type": "distilbert",
-  "n_heads": 12,
-  "n_layers": 6,
-  "pad_token_id": 0,
-  "problem_type": "single_label_classification",
-  "qa_dropout": 0.1,
-  "seq_classif_dropout": 0.2,
-  "sinusoidal_pos_embds": false,
-  "tie_weights_": true,
-  "torch_dtype": "float32",
-  "transformers_version": "4.25.1",
-  "vocab_size": 30522
-}

static/model_shards/pytorch_model-00001-of-00006.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4b71425a895e228378ca2e132485db2027a2d04fa588241bbe3c91d7557167be
-size 537

static/model_shards/pytorch_model-00002-of-00006.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6912a218252c3bd43d77edb6a94f9baea358e0ef3b0cbb1d7c565dff7317f67c
-size 93764522

static/model_shards/pytorch_model-00003-of-00006.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d000e5c4c8e8d61e178943368d71e1a9d2fc6c3ea9d9f58ade1668599ace06ed
-size 48846141

static/model_shards/pytorch_model-00004-of-00006.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0a3e308faaa42d27babfd6e8b29d65deeb66b109d0477a9e0c9f76a70af3ce3f
-size 47263787

static/model_shards/pytorch_model-00005-of-00006.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1951867aacee99f76ec6374166ff03869c8d5cca4004cfe496cd36e948b8c745
-size 49618047

static/model_shards/pytorch_model-00006-of-00006.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ae443170da3a02b133bfd6f8bc2c8b2c205b1e89e7c38950886eeb920cb9f406
-size 28363923

static/model_shards/pytorch_model.bin.index.json DELETED Viewed

@@ -1,111 +0,0 @@
-{
-  "metadata": {
-    "total_size": 267820040
-  },
-  "weight_map": {
-    "classifier.bias": "pytorch_model-00006-of-00006.bin",
-    "classifier.weight": "pytorch_model-00006-of-00006.bin",
-    "distilbert.embeddings.LayerNorm.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.embeddings.LayerNorm.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.embeddings.position_embeddings.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.embeddings.word_embeddings.weight": "pytorch_model-00002-of-00006.bin",
-    "distilbert.transformer.layer.0.attention.k_lin.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.attention.k_lin.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.attention.out_lin.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.attention.out_lin.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.attention.q_lin.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.attention.q_lin.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.attention.v_lin.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.attention.v_lin.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.ffn.lin1.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.ffn.lin1.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.ffn.lin2.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.ffn.lin2.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.output_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.output_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.sa_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.0.sa_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.attention.k_lin.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.attention.k_lin.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.attention.out_lin.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.attention.out_lin.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.attention.q_lin.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.attention.q_lin.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.attention.v_lin.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.attention.v_lin.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.ffn.lin1.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.ffn.lin1.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.ffn.lin2.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.1.ffn.lin2.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.1.output_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.1.output_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.1.sa_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.1.sa_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
-    "distilbert.transformer.layer.2.attention.k_lin.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.attention.k_lin.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.attention.out_lin.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.attention.out_lin.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.attention.q_lin.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.attention.q_lin.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.attention.v_lin.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.attention.v_lin.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.ffn.lin1.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.ffn.lin1.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.ffn.lin2.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.ffn.lin2.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.output_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.output_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.sa_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.2.sa_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.3.attention.k_lin.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.3.attention.k_lin.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.3.attention.out_lin.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.3.attention.out_lin.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.3.attention.q_lin.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.3.attention.q_lin.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.3.attention.v_lin.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.3.attention.v_lin.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.3.ffn.lin1.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.3.ffn.lin1.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.3.ffn.lin2.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.3.ffn.lin2.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.3.output_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.3.output_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.3.sa_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.3.sa_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
-    "distilbert.transformer.layer.4.attention.k_lin.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.attention.k_lin.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.attention.out_lin.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.attention.out_lin.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.attention.q_lin.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.attention.q_lin.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.attention.v_lin.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.attention.v_lin.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.ffn.lin1.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.ffn.lin1.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.ffn.lin2.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.ffn.lin2.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.output_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.output_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.sa_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.4.sa_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.5.attention.k_lin.bias": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.attention.k_lin.weight": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.attention.out_lin.bias": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.attention.out_lin.weight": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.attention.q_lin.bias": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.5.attention.q_lin.weight": "pytorch_model-00005-of-00006.bin",
-    "distilbert.transformer.layer.5.attention.v_lin.bias": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.attention.v_lin.weight": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.ffn.lin1.bias": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.ffn.lin1.weight": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.ffn.lin2.bias": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.ffn.lin2.weight": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.output_layer_norm.bias": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.output_layer_norm.weight": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.sa_layer_norm.bias": "pytorch_model-00006-of-00006.bin",
-    "distilbert.transformer.layer.5.sa_layer_norm.weight": "pytorch_model-00006-of-00006.bin",
-    "pre_classifier.bias": "pytorch_model-00006-of-00006.bin",
-    "pre_classifier.weight": "pytorch_model-00006-of-00006.bin"
-  }
-}

static/styles.css CHANGED Viewed

@@ -222,6 +222,21 @@ html {
     font-weight: bold;
 }
 .selection__form {
     display: table-row-group;
     vertical-align: left;

     font-weight: bold;
 }
+table {
+    width: 100%;
+}
+.output__list-item_int {
+    font-size: 12px;
+    color: #2c2161;
+}
+.output__list-coloreditem_int {
+    font-size: 14px;
+    color: #3cd0ff;
+    font-weight: bold;
+}
 .selection__form {
     display: table-row-group;
     vertical-align: left;

static/tokenizer_shards/special_tokens_map.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
-}

static/tokenizer_shards/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

static/tokenizer_shards/tokenizer_config.json DELETED Viewed

@@ -1,14 +0,0 @@
-{
-  "cls_token": "[CLS]",
-  "do_lower_case": true,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "name_or_path": "celise88/distilbert-base-uncased-finetuned-binary-classifier",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "special_tokens_map_file": null,
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "DistilBertTokenizer",
-  "unk_token": "[UNK]"
-}

static/tokenizer_shards/vocab.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

templates/candidate_matcher.html CHANGED Viewed

@@ -34,14 +34,10 @@
             <article class="output__section">
                 <h2 class="output__subtitle">Extracted Skills</h2>
                 <ul>
-                {% for word in jobdesc.lower().replace("-"," ").replace(")","").replace("(","").replace(":","").replace(",","").replace("/"," ").split(" ") %}
-                    {% if skills.get(word) == "Skill" %}
-                        <span class="output__list-coloreditem">{{ word }}</span>
-                    {% else %}
-                        <span class="output__list-item">{{ word }}</span>
-                    {% endif %}
-                {% endfor %}
-            </ul>
             </article>
             <article class="output__section">
                 <h2 class="output__subtitle">We Think Your Job Description Most Closely Matches these Roles</h2>
@@ -81,8 +77,7 @@
     </main>
     <footer class="footer">
         <ul class="footer__text">
-            <li class="footer__text-item">© 2023 Pathfinder</li>
-            <li class="footer__text-item">For details on the finetuned distilbert model being used in this step, please see: <a class="footer__text-link" href="https://www.github.com/celise88/Pathfinder">github.com/celise88/Pathfinder</li>
         </ul>
     </footer>
 </body>

             <article class="output__section">
                 <h2 class="output__subtitle">Extracted Skills</h2>
                 <ul>
+                    {% for skill in skills %}
+                    <li class="sectionlist__item">{{ skill.replace('.','') }}</li>
+                    {% endfor %}
+                </ul>
             </article>
             <article class="output__section">
                 <h2 class="output__subtitle">We Think Your Job Description Most Closely Matches these Roles</h2>
     </main>
     <footer class="footer">
         <ul class="footer__text">
+            <li class="footer__text-item">© 2024 Pathfinder</li>
         </ul>
     </footer>
 </body>

templates/find_hire.html CHANGED Viewed

@@ -30,7 +30,7 @@
     </main>
     <footer class="footer">
         <ul class="footer__text">
-            <li class="footer__text-item">© 2023 Pathfinder</li>
         </ul>
     </footer>
 </body>

     </main>
     <footer class="footer">
         <ul class="footer__text">
+            <li class="footer__text-item">© 2024 Pathfinder</li>
         </ul>
     </footer>
 </body>

templates/find_match.html CHANGED Viewed

@@ -33,16 +33,13 @@
             <li class="sectionlist__item"><a style="color: #2c2161" href={{ linklist[n] }}>{{ jobpostings[n] }}</a></li>
             {% endfor %}
         </ul>
-        {% else %}
-        <h2 class="pagesubtitle">We're sorry! This page is currently under construction.</h2>
-        <h2 class="pagesubtitle">Please check back soon to get {{ jobselection }} jobs that are a great match for your skillset and interests!</h2>
         {% endif %}
         <br>
         <br>
     </main>
     <footer class="footer">
         <ul class="footer__text">
-            <li class="footer__text-item">© 2023 Pathfinder</li>
             <li class="footer__text-item">Job postings courtesy of <a class="footer__text-link" href="https://www.onetonline.org">onetonline.org</li>
             </ul>
     </footer>

             <li class="sectionlist__item"><a style="color: #2c2161" href={{ linklist[n] }}>{{ jobpostings[n] }}</a></li>
             {% endfor %}
         </ul>
         {% endif %}
         <br>
         <br>
     </main>
     <footer class="footer">
         <ul class="footer__text">
+            <li class="footer__text-item">© 2024 Pathfinder</li>
             <li class="footer__text-item">Job postings courtesy of <a class="footer__text-link" href="https://www.onetonline.org">onetonline.org</li>
             </ul>
     </footer>

templates/find_my_match.html CHANGED Viewed

@@ -33,15 +33,11 @@
             {% if resume %}
             <article class="output__section">
                 <h2 class="output__subtitle">Extracted Skills</h3>
-                <ul>
-                {% for word in resume.lower().replace("-"," ").replace(")","").replace("(","").replace(":","").replace(",","").replace("/"," ").split(" ") %}
-                    {% if skills.get(word) == "Skill" %}
-                        <span class="output__list-coloreditem">{{ word }}</span>
-                    {% else %}
-                        <span class="output__list-item">{{ word }}</span>
-                    {% endif %}
-                {% endfor %}
-            </ul>
             </article>
             <article class="output__section">
                 <h2 class="output__subtitle">Job Matches</h3>
@@ -87,8 +83,7 @@
     </main>
     <footer class="footer">
         <ul class="footer__text">
-            <li class="footer__text-item">© 2023 Pathfinder</li>
-            <li class="footer__text-item">For details on the finetuned distilbert model being used in this step, please see: <a class="footer__text-link" href="https://www.github.com/celise88/Pathfinder">github.com/celise88/Pathfinder</li>
         </ul>
     </footer>
 </body>

             {% if resume %}
             <article class="output__section">
                 <h2 class="output__subtitle">Extracted Skills</h3>
+                <ul class="output__list-coloreditem">
+                    {% for skill in skills %}
+                    <li class="sectionlist__item">{{ skill.replace('.','') }}</li>
+                    {% endfor %}
+                </ul>
             </article>
             <article class="output__section">
                 <h2 class="output__subtitle">Job Matches</h3>
     </main>
     <footer class="footer">
         <ul class="footer__text">
+            <li class="footer__text-item">© 2024 Pathfinder</li>
         </ul>
     </footer>
 </body>

templates/job_list.html CHANGED Viewed

@@ -283,7 +283,7 @@
     </main>
     <footer class="footer">
         <ul class="footer__text">
-            <li class="footer__text-item">© 2023 Pathfinder</li>
             <li class="footer__text-item">Information on this page is courtesy of <a class="footer__text-link" href="https://www.onetonline.org">onetonline.org</li>
         </ul>
     </footer>

     </main>
     <footer class="footer">
         <ul class="footer__text">
+            <li class="footer__text-item">© 2024 Pathfinder</li>
             <li class="footer__text-item">Information on this page is courtesy of <a class="footer__text-link" href="https://www.onetonline.org">onetonline.org</li>
         </ul>
     </footer>

templates/login.html CHANGED Viewed

@@ -50,7 +50,7 @@
     </main>
     <footer class="footer">
         <ul class="footer__text">
-            <li class="footer__text-item">© 2023 Pathfinder</li>
         </ul>
     </footer>
 </body>

     </main>
     <footer class="footer">
         <ul class="footer__text">
+            <li class="footer__text-item">© 2024 Pathfinder</li>
         </ul>
     </footer>
 </body>

templates/logout.html CHANGED Viewed

@@ -40,7 +40,7 @@
     </main>
     <footer class="footer">
         <ul class="footer__text">
-            <li class="footer__text-item">© 2023 Pathfinder</li>
         </ul>
     </footer>
 </body>

     </main>
     <footer class="footer">
         <ul class="footer__text">
+            <li class="footer__text-item">© 2024 Pathfinder</li>
         </ul>
     </footer>
 </body>

templates/register.html CHANGED Viewed

@@ -53,7 +53,7 @@
     </main>
     <footer class="footer">
         <ul class="footer__text">
-            <li class="footer__text-item">© 2023 Pathfinder</li>
         </ul>
     </footer>
 </body>

     </main>
     <footer class="footer">
         <ul class="footer__text">
+            <li class="footer__text-item">© 2024 Pathfinder</li>
         </ul>
     </footer>
 </body>