celise88 commited on
Commit
54919c4
1 Parent(s): 3588e6d

migrate to mistral LLM

Browse files
.env DELETED
@@ -1 +0,0 @@
1
- COHERE_TOKEN=''
 
 
.gitattributes CHANGED
@@ -1,2 +1 @@
1
- static/embeddings/*.csv filter=lfs diff=lfs merge=lfs -text
2
- static/model_shards/*.bin filter=lfs diff=lfs merge=lfs -text
 
1
+ static/embeddings/*.csv filter=lfs diff=lfs merge=lfs -text
 
main.py CHANGED
@@ -7,7 +7,7 @@
7
  # License: MIT License
8
 
9
  # IMPORTS
10
- from fastapi import FastAPI, Request, Form, File, UploadFile, BackgroundTasks, Depends
11
  from fastapi.templating import Jinja2Templates
12
  from fastapi.staticfiles import StaticFiles
13
  from fastapi.responses import HTMLResponse
@@ -19,7 +19,7 @@ from localStoragePy import localStoragePy
19
  localStorage = localStoragePy('pathfinder', 'text')
20
 
21
  from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks, get_onet_activities, get_onet_context, get_onet_skills, get_onet_knowledge, get_onet_abilities, get_onet_interests, get_onet_styles, get_onet_values, get_job_postings
22
- from match_utils import neighborhoods, get_resume, skillNER, sim_result_loop, get_links, coSkillEmbed, sim_result_loop_jobFinder, sim_result_loop_candFinder
23
  from user_utils import Hash
24
 
25
  # APP SETUP
@@ -171,24 +171,24 @@ async def post_matches(request: Request, bt: BackgroundTasks, resume: UploadFile
171
 
172
  username = localStorage.getItem('username')
173
 
174
- def add_data_to_db(resume):
175
  db = pd.read_csv('static/res_embeddings.csv')
176
- embeds = format(coSkillEmbed(resume)).replace('[[','').replace(']]','').split(',')
177
  db.iloc[db['username']== username,5:] = embeds
178
  db.to_csv('static/res_embeddings.csv', index=False)
179
 
180
- def get_jobs_from_db(resume):
181
- job_matches = sim_result_loop_jobFinder(resume)
182
  print(job_matches)
183
 
184
  resume = get_resume(resume)
185
- skills = await skillNER(resume)
186
- simResults = await sim_result_loop(resume)
187
  links = get_links(simResults[0])
188
 
189
  if username is not None:
190
- bt.add_task(add_data_to_db, resume)
191
- bt.add_task(get_jobs_from_db, resume)
192
 
193
  return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults[0], 'links': links, 'statelist': statelist})
194
 
@@ -212,24 +212,24 @@ async def post_matches(request: Request, bt: BackgroundTasks, jobdesc: UploadFil
212
 
213
  username = localStorage.getItem('username')
214
 
215
- def add_data_to_db(jobdesc):
216
  db = pd.read_csv('static/jd_embeddings.csv')
217
- embeds = format(coSkillEmbed(jobdesc)).replace('[[','').replace(']]','').split(',')
218
  db.iloc[db['username']== username,5:] = embeds
219
  db.to_csv('static/jd_embeddings.csv', index=False)
220
 
221
- def get_cand_from_db(jobdesc):
222
- cand_matches = sim_result_loop_candFinder(jobdesc)
223
  print(cand_matches)
224
 
225
  jobdesc = get_resume(jobdesc)
226
- skills = await skillNER(jobdesc)
227
- simResults = await sim_result_loop(jobdesc)
228
  links = get_links(simResults[0])
229
 
230
  if username is not None:
231
- bt.add_task(add_data_to_db, jobdesc)
232
- bt.add_task(get_cand_from_db, jobdesc)
233
 
234
  return templates.TemplateResponse('candidate_matcher.html', context={'request': request, 'jobdesc': jobdesc, 'skills': skills, 'simResults': simResults[0], 'links': links})
235
 
 
7
  # License: MIT License
8
 
9
  # IMPORTS
10
+ from fastapi import FastAPI, Request, Form, File, UploadFile, BackgroundTasks
11
  from fastapi.templating import Jinja2Templates
12
  from fastapi.staticfiles import StaticFiles
13
  from fastapi.responses import HTMLResponse
 
19
  localStorage = localStoragePy('pathfinder', 'text')
20
 
21
  from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks, get_onet_activities, get_onet_context, get_onet_skills, get_onet_knowledge, get_onet_abilities, get_onet_interests, get_onet_styles, get_onet_values, get_job_postings
22
+ from match_utils import neighborhoods, get_resume, skill_extractor, sim_result_loop, get_links, skillEmbed, sim_result_loop_jobFinder, sim_result_loop_candFinder
23
  from user_utils import Hash
24
 
25
  # APP SETUP
 
171
 
172
  username = localStorage.getItem('username')
173
 
174
+ def add_data_to_db(skills):
175
  db = pd.read_csv('static/res_embeddings.csv')
176
+ embeds = format(skillEmbed(skills)).replace('[[','').replace(']]','').replace('[','').replace(']','').split(',')
177
  db.iloc[db['username']== username,5:] = embeds
178
  db.to_csv('static/res_embeddings.csv', index=False)
179
 
180
+ def get_jobs_from_db(skills):
181
+ job_matches = sim_result_loop_jobFinder(skills)
182
  print(job_matches)
183
 
184
  resume = get_resume(resume)
185
+ skills = skill_extractor(resume)
186
+ simResults = await sim_result_loop(skills)
187
  links = get_links(simResults[0])
188
 
189
  if username is not None:
190
+ bt.add_task(add_data_to_db, skills)
191
+ bt.add_task(get_jobs_from_db, skills)
192
 
193
  return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults[0], 'links': links, 'statelist': statelist})
194
 
 
212
 
213
  username = localStorage.getItem('username')
214
 
215
+ def add_data_to_db(skills):
216
  db = pd.read_csv('static/jd_embeddings.csv')
217
+ embeds = format(skillEmbed(skills)).replace('[[','').replace(']]','').split(',')
218
  db.iloc[db['username']== username,5:] = embeds
219
  db.to_csv('static/jd_embeddings.csv', index=False)
220
 
221
+ def get_cand_from_db(skills):
222
+ cand_matches = sim_result_loop_candFinder(skills)
223
  print(cand_matches)
224
 
225
  jobdesc = get_resume(jobdesc)
226
+ skills = skill_extractor(jobdesc)
227
+ simResults = await sim_result_loop(skills)
228
  links = get_links(simResults[0])
229
 
230
  if username is not None:
231
+ bt.add_task(add_data_to_db, skills)
232
+ bt.add_task(get_cand_from_db, skills)
233
 
234
  return templates.TemplateResponse('candidate_matcher.html', context={'request': request, 'jobdesc': jobdesc, 'skills': skills, 'simResults': simResults[0], 'links': links})
235
 
match_utils.py CHANGED
@@ -1,19 +1,15 @@
1
- from cleantext import clean
2
- import string
3
- from nltk.tokenize import SpaceTokenizer
4
- import nltk
5
- import cohere
6
- from cohere import CohereError
7
- import os
8
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
9
  from docx import Document
10
  import pandas as pd
11
  import numpy as np
12
  from numpy.linalg import norm
13
  import ssl
14
- from dotenv import load_dotenv
15
  import plotly_express as px
16
  from scrape_onet import get_onet_code
 
 
 
 
 
17
 
18
  # SSL CERTIFICATE FIX
19
  try:
@@ -23,24 +19,20 @@ except AttributeError:
23
  else:
24
  ssl._create_default_https_context = _create_unverified_https_context
25
 
26
- # DOWNLOAD NLTK DATA IF NOT ALREADY DOWNLOADED
27
- if os.path.isdir('nltk_data')==False:
28
- nltk.download('stopwords', quiet=True)
29
-
30
- # LOAD ENVIRONMENT VARIABLES
31
- load_dotenv()
32
-
33
  # LOAD COHERE EMBEDDINGS:
34
  simdat = pd.read_csv('static/embeddings/cohere_embeddings.csv')
35
  coheredat = pd.read_csv('static/cohere_tSNE_dat.csv')
36
 
37
- # LOAD FINE-TUNED MODEL
38
- # (see https://huggingface.co/celise88/distilbert-base-uncased-finetuned-binary-classifier)
39
- model = AutoModelForSequenceClassification.from_pretrained('static/model_shards', low_cpu_mem_usage=True)
40
- tokenizer = AutoTokenizer.from_pretrained('static/tokenizer_shards', low_cpu_mem_usage=True)
41
- classifier = pipeline('text-classification', model = model, tokenizer = tokenizer)
42
 
43
  # UTILITY FUNCTIONS
 
 
 
 
44
  async def neighborhoods(jobtitle=None):
45
  def format_title(logo, title, subtitle, title_font_size = 28, subtitle_font_size=14):
46
  logo = f'<a href="/" target="_self">{logo}</a>'
@@ -52,6 +44,7 @@ async def neighborhoods(jobtitle=None):
52
  fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
53
  fig.write_html('templates/job_neighborhoods.html')
54
 
 
55
  def get_resume(resume):
56
  path = f"static/{resume.filename}"
57
  with open(path, 'wb') as buffer:
@@ -63,15 +56,35 @@ def get_resume(resume):
63
  resume = "\n".join(text)
64
  return resume
65
 
66
- def coSkillEmbed(text):
67
- try:
68
- co = cohere.Client(os.getenv("COHERE_TOKEN"))
69
- response = co.embed(
70
- model='large',
71
- texts=[text])
72
- return response.embeddings
73
- except CohereError as e:
74
- return e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  async def sim_result_loop(skilltext):
77
  if type(skilltext) == str:
@@ -79,7 +92,9 @@ async def sim_result_loop(skilltext):
79
  if type(skilltext) == dict:
80
  skills = [key for key, value in skilltext.items() if value == "Skill"]
81
  skills = str(skills).replace("'", "").replace(",", "")
82
- embeds = coSkillEmbed(skills)
 
 
83
  def cosine(A, B):
84
  return np.dot(A,B)/(norm(A)*norm(B))
85
  def format_sim(sim):
@@ -102,25 +117,6 @@ async def sim_result_loop(skilltext):
102
  simResults.iloc[x,1] = format_sim(simResults.iloc[x,1])
103
  return simResults, embeds
104
 
105
- async def skillNER(resume):
106
- def clean_my_text(text):
107
- clean_text = ' '.join(text.splitlines())
108
- clean_text = clean_text.replace('-', " ").replace("/"," ")
109
- clean_text = clean(clean_text.translate(str.maketrans('', '', string.punctuation)))
110
- return clean_text
111
- resume = clean_my_text(resume)
112
- stops = set(nltk.corpus.stopwords.words('english'))
113
- stops = stops.union({'eg', 'ie', 'etc', 'experience', 'experiences', 'experienced', 'experiencing', 'knowledge',
114
- 'ability', 'abilities', 'skill', 'skills', 'skilled', 'including', 'includes', 'included', 'include'
115
- 'education', 'follow', 'following', 'follows', 'followed', 'make', 'made', 'makes', 'making', 'maker',
116
- 'available', 'large', 'larger', 'largescale', 'client', 'clients', 'responsible', 'x', 'many', 'team', 'teams',
117
- 'concern', 'concerned', 'concerning', 'concerns', 'space', 'spaces', 'spaced'})
118
- resume = [word for word in SpaceTokenizer().tokenize(resume) if word not in stops]
119
- resume = [word for word in resume if ")" not in word]
120
- resume = [word for word in resume if "(" not in word]
121
- skills = {}
122
- [skills.update({word : "Skill"}) if classifier(word)[0]['label'] == 'LABEL_1' else skills.update({word: "Not Skill"}) for word in resume]
123
- return skills
124
 
125
  def get_links(simResults):
126
  links = []
@@ -128,8 +124,9 @@ def get_links(simResults):
128
  [links.append("https://www.onetonline.org/link/summary/" + get_onet_code(title)) for title in titles]
129
  return links
130
 
131
- def sim_result_loop_jobFinder(resume):
132
- embeds = coSkillEmbed(resume)
 
133
  def cosine(A, B):
134
  return np.dot(A,B)/(norm(A)*norm(B))
135
  def format_sim(sim):
@@ -149,8 +146,9 @@ def sim_result_loop_jobFinder(resume):
149
  simResults.iloc[x,2] = format_sim(simResults.iloc[x,2])
150
  return simResults
151
 
152
- def sim_result_loop_candFinder(jobdesc):
153
- embeds = coSkillEmbed(jobdesc)
 
154
  def cosine(A, B):
155
  return np.dot(A,B)/(norm(A)*norm(B))
156
  def format_sim(sim):
 
 
 
 
 
 
 
 
 
1
  from docx import Document
2
  import pandas as pd
3
  import numpy as np
4
  from numpy.linalg import norm
5
  import ssl
 
6
  import plotly_express as px
7
  from scrape_onet import get_onet_code
8
+ from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
9
+ from langchain_community.llms.ollama import Ollama
10
+ from langchain_community.embeddings import OllamaEmbeddings
11
+ from langchain.chains import LLMChain
12
+ from langchain.output_parsers import CommaSeparatedListOutputParser
13
 
14
  # SSL CERTIFICATE FIX
15
  try:
 
19
  else:
20
  ssl._create_default_https_context = _create_unverified_https_context
21
 
 
 
 
 
 
 
 
22
  # LOAD COHERE EMBEDDINGS:
23
  simdat = pd.read_csv('static/embeddings/cohere_embeddings.csv')
24
  coheredat = pd.read_csv('static/cohere_tSNE_dat.csv')
25
 
26
+ # LOAD LLM MODELS:
27
+ model = Ollama(model="mistral")
28
+ embedding_model = OllamaEmbeddings(model="mistral")
29
+ parser = CommaSeparatedListOutputParser()
 
30
 
31
  # UTILITY FUNCTIONS
32
+ def remove_new_line(value):
33
+ return ''.join(value.splitlines())
34
+
35
+
36
  async def neighborhoods(jobtitle=None):
37
  def format_title(logo, title, subtitle, title_font_size = 28, subtitle_font_size=14):
38
  logo = f'<a href="/" target="_self">{logo}</a>'
 
44
  fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
45
  fig.write_html('templates/job_neighborhoods.html')
46
 
47
+
48
  def get_resume(resume):
49
  path = f"static/{resume.filename}"
50
  with open(path, 'wb') as buffer:
 
56
  resume = "\n".join(text)
57
  return resume
58
 
59
+
60
+ def skill_extractor(resume):
61
+ system_prompt_template = SystemMessagePromptTemplate.from_template("""
62
+ ### [INST]
63
+ Instruction: You are an expert job analyst tasked with identifying both technical and soft skills in resumes.
64
+ You always respond in the following format: 'skill1, skill2, skill3, ...' and never provide an explanation or justification for your response.
65
+ For example, given the following statement in a resume: 'significant experience in python and familiarity with machine learning packages, such as sklearn, torch, and tensorflow'
66
+ you respond: 'python, sklearn, torch, tensorflow'.
67
+ [/INST]
68
+ """)
69
+
70
+ human_prompt_template = HumanMessagePromptTemplate.from_template("""
71
+ ### QUESTION:
72
+ What skills are in the following resume?:
73
+ {resume}
74
+ """)
75
+
76
+ prompt = ChatPromptTemplate.from_messages([system_prompt_template, human_prompt_template])
77
+ llm_chain = LLMChain(llm=model, prompt=prompt)
78
+
79
+ result = llm_chain.invoke({"resume": resume})
80
+ result = remove_new_line(result['text'])
81
+ return parser.parse(result)
82
+
83
+
84
+ def skillEmbed(skills):
85
+ embeddings = embedding_model.embed_query(skills)
86
+ return embeddings
87
+
88
 
89
  async def sim_result_loop(skilltext):
90
  if type(skilltext) == str:
 
92
  if type(skilltext) == dict:
93
  skills = [key for key, value in skilltext.items() if value == "Skill"]
94
  skills = str(skills).replace("'", "").replace(",", "")
95
+ if type(skilltext) == list:
96
+ skills = ', '.join(skilltext)
97
+ embeds = skillEmbed(skills)
98
  def cosine(A, B):
99
  return np.dot(A,B)/(norm(A)*norm(B))
100
  def format_sim(sim):
 
117
  simResults.iloc[x,1] = format_sim(simResults.iloc[x,1])
118
  return simResults, embeds
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  def get_links(simResults):
122
  links = []
 
124
  [links.append("https://www.onetonline.org/link/summary/" + get_onet_code(title)) for title in titles]
125
  return links
126
 
127
+
128
+ def sim_result_loop_jobFinder(skills):
129
+ embeds = skillEmbed(skills)
130
  def cosine(A, B):
131
  return np.dot(A,B)/(norm(A)*norm(B))
132
  def format_sim(sim):
 
146
  simResults.iloc[x,2] = format_sim(simResults.iloc[x,2])
147
  return simResults
148
 
149
+
150
+ def sim_result_loop_candFinder(skills):
151
+ embeds = skillEmbed(skills)
152
  def cosine(A, B):
153
  return np.dot(A,B)/(norm(A)*norm(B))
154
  def format_sim(sim):
requirements.txt CHANGED
@@ -22,4 +22,5 @@ passlib==1.7.4
22
  localStoragePy==0.2.3
23
  sentence-transformers==2.2.2
24
  mangum==0.17.0
25
- certifi==2023.7.22
 
 
22
  localStoragePy==0.2.3
23
  sentence-transformers==2.2.2
24
  mangum==0.17.0
25
+ certifi==2023.7.22
26
+ langchain==0.1.4
static/model_shards/config.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "_name_or_path": "celise88/distilbert-base-uncased-finetuned-binary-classifier",
3
- "activation": "gelu",
4
- "architectures": [
5
- "DistilBertForSequenceClassification"
6
- ],
7
- "attention_dropout": 0.1,
8
- "dim": 768,
9
- "dropout": 0.1,
10
- "hidden_dim": 3072,
11
- "initializer_range": 0.02,
12
- "max_position_embeddings": 512,
13
- "model_type": "distilbert",
14
- "n_heads": 12,
15
- "n_layers": 6,
16
- "pad_token_id": 0,
17
- "problem_type": "single_label_classification",
18
- "qa_dropout": 0.1,
19
- "seq_classif_dropout": 0.2,
20
- "sinusoidal_pos_embds": false,
21
- "tie_weights_": true,
22
- "torch_dtype": "float32",
23
- "transformers_version": "4.25.1",
24
- "vocab_size": 30522
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/model_shards/pytorch_model-00001-of-00006.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b71425a895e228378ca2e132485db2027a2d04fa588241bbe3c91d7557167be
3
- size 537
 
 
 
 
static/model_shards/pytorch_model-00002-of-00006.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6912a218252c3bd43d77edb6a94f9baea358e0ef3b0cbb1d7c565dff7317f67c
3
- size 93764522
 
 
 
 
static/model_shards/pytorch_model-00003-of-00006.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d000e5c4c8e8d61e178943368d71e1a9d2fc6c3ea9d9f58ade1668599ace06ed
3
- size 48846141
 
 
 
 
static/model_shards/pytorch_model-00004-of-00006.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a3e308faaa42d27babfd6e8b29d65deeb66b109d0477a9e0c9f76a70af3ce3f
3
- size 47263787
 
 
 
 
static/model_shards/pytorch_model-00005-of-00006.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1951867aacee99f76ec6374166ff03869c8d5cca4004cfe496cd36e948b8c745
3
- size 49618047
 
 
 
 
static/model_shards/pytorch_model-00006-of-00006.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae443170da3a02b133bfd6f8bc2c8b2c205b1e89e7c38950886eeb920cb9f406
3
- size 28363923
 
 
 
 
static/model_shards/pytorch_model.bin.index.json DELETED
@@ -1,111 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 267820040
4
- },
5
- "weight_map": {
6
- "classifier.bias": "pytorch_model-00006-of-00006.bin",
7
- "classifier.weight": "pytorch_model-00006-of-00006.bin",
8
- "distilbert.embeddings.LayerNorm.bias": "pytorch_model-00003-of-00006.bin",
9
- "distilbert.embeddings.LayerNorm.weight": "pytorch_model-00003-of-00006.bin",
10
- "distilbert.embeddings.position_embeddings.weight": "pytorch_model-00003-of-00006.bin",
11
- "distilbert.embeddings.word_embeddings.weight": "pytorch_model-00002-of-00006.bin",
12
- "distilbert.transformer.layer.0.attention.k_lin.bias": "pytorch_model-00003-of-00006.bin",
13
- "distilbert.transformer.layer.0.attention.k_lin.weight": "pytorch_model-00003-of-00006.bin",
14
- "distilbert.transformer.layer.0.attention.out_lin.bias": "pytorch_model-00003-of-00006.bin",
15
- "distilbert.transformer.layer.0.attention.out_lin.weight": "pytorch_model-00003-of-00006.bin",
16
- "distilbert.transformer.layer.0.attention.q_lin.bias": "pytorch_model-00003-of-00006.bin",
17
- "distilbert.transformer.layer.0.attention.q_lin.weight": "pytorch_model-00003-of-00006.bin",
18
- "distilbert.transformer.layer.0.attention.v_lin.bias": "pytorch_model-00003-of-00006.bin",
19
- "distilbert.transformer.layer.0.attention.v_lin.weight": "pytorch_model-00003-of-00006.bin",
20
- "distilbert.transformer.layer.0.ffn.lin1.bias": "pytorch_model-00003-of-00006.bin",
21
- "distilbert.transformer.layer.0.ffn.lin1.weight": "pytorch_model-00003-of-00006.bin",
22
- "distilbert.transformer.layer.0.ffn.lin2.bias": "pytorch_model-00003-of-00006.bin",
23
- "distilbert.transformer.layer.0.ffn.lin2.weight": "pytorch_model-00003-of-00006.bin",
24
- "distilbert.transformer.layer.0.output_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
25
- "distilbert.transformer.layer.0.output_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
26
- "distilbert.transformer.layer.0.sa_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
27
- "distilbert.transformer.layer.0.sa_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
28
- "distilbert.transformer.layer.1.attention.k_lin.bias": "pytorch_model-00003-of-00006.bin",
29
- "distilbert.transformer.layer.1.attention.k_lin.weight": "pytorch_model-00003-of-00006.bin",
30
- "distilbert.transformer.layer.1.attention.out_lin.bias": "pytorch_model-00003-of-00006.bin",
31
- "distilbert.transformer.layer.1.attention.out_lin.weight": "pytorch_model-00003-of-00006.bin",
32
- "distilbert.transformer.layer.1.attention.q_lin.bias": "pytorch_model-00003-of-00006.bin",
33
- "distilbert.transformer.layer.1.attention.q_lin.weight": "pytorch_model-00003-of-00006.bin",
34
- "distilbert.transformer.layer.1.attention.v_lin.bias": "pytorch_model-00003-of-00006.bin",
35
- "distilbert.transformer.layer.1.attention.v_lin.weight": "pytorch_model-00003-of-00006.bin",
36
- "distilbert.transformer.layer.1.ffn.lin1.bias": "pytorch_model-00003-of-00006.bin",
37
- "distilbert.transformer.layer.1.ffn.lin1.weight": "pytorch_model-00003-of-00006.bin",
38
- "distilbert.transformer.layer.1.ffn.lin2.bias": "pytorch_model-00004-of-00006.bin",
39
- "distilbert.transformer.layer.1.ffn.lin2.weight": "pytorch_model-00004-of-00006.bin",
40
- "distilbert.transformer.layer.1.output_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
41
- "distilbert.transformer.layer.1.output_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
42
- "distilbert.transformer.layer.1.sa_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
43
- "distilbert.transformer.layer.1.sa_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
44
- "distilbert.transformer.layer.2.attention.k_lin.bias": "pytorch_model-00004-of-00006.bin",
45
- "distilbert.transformer.layer.2.attention.k_lin.weight": "pytorch_model-00004-of-00006.bin",
46
- "distilbert.transformer.layer.2.attention.out_lin.bias": "pytorch_model-00004-of-00006.bin",
47
- "distilbert.transformer.layer.2.attention.out_lin.weight": "pytorch_model-00004-of-00006.bin",
48
- "distilbert.transformer.layer.2.attention.q_lin.bias": "pytorch_model-00004-of-00006.bin",
49
- "distilbert.transformer.layer.2.attention.q_lin.weight": "pytorch_model-00004-of-00006.bin",
50
- "distilbert.transformer.layer.2.attention.v_lin.bias": "pytorch_model-00004-of-00006.bin",
51
- "distilbert.transformer.layer.2.attention.v_lin.weight": "pytorch_model-00004-of-00006.bin",
52
- "distilbert.transformer.layer.2.ffn.lin1.bias": "pytorch_model-00004-of-00006.bin",
53
- "distilbert.transformer.layer.2.ffn.lin1.weight": "pytorch_model-00004-of-00006.bin",
54
- "distilbert.transformer.layer.2.ffn.lin2.bias": "pytorch_model-00004-of-00006.bin",
55
- "distilbert.transformer.layer.2.ffn.lin2.weight": "pytorch_model-00004-of-00006.bin",
56
- "distilbert.transformer.layer.2.output_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
57
- "distilbert.transformer.layer.2.output_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
58
- "distilbert.transformer.layer.2.sa_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
59
- "distilbert.transformer.layer.2.sa_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
60
- "distilbert.transformer.layer.3.attention.k_lin.bias": "pytorch_model-00004-of-00006.bin",
61
- "distilbert.transformer.layer.3.attention.k_lin.weight": "pytorch_model-00004-of-00006.bin",
62
- "distilbert.transformer.layer.3.attention.out_lin.bias": "pytorch_model-00004-of-00006.bin",
63
- "distilbert.transformer.layer.3.attention.out_lin.weight": "pytorch_model-00004-of-00006.bin",
64
- "distilbert.transformer.layer.3.attention.q_lin.bias": "pytorch_model-00004-of-00006.bin",
65
- "distilbert.transformer.layer.3.attention.q_lin.weight": "pytorch_model-00004-of-00006.bin",
66
- "distilbert.transformer.layer.3.attention.v_lin.bias": "pytorch_model-00004-of-00006.bin",
67
- "distilbert.transformer.layer.3.attention.v_lin.weight": "pytorch_model-00004-of-00006.bin",
68
- "distilbert.transformer.layer.3.ffn.lin1.bias": "pytorch_model-00005-of-00006.bin",
69
- "distilbert.transformer.layer.3.ffn.lin1.weight": "pytorch_model-00005-of-00006.bin",
70
- "distilbert.transformer.layer.3.ffn.lin2.bias": "pytorch_model-00005-of-00006.bin",
71
- "distilbert.transformer.layer.3.ffn.lin2.weight": "pytorch_model-00005-of-00006.bin",
72
- "distilbert.transformer.layer.3.output_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
73
- "distilbert.transformer.layer.3.output_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
74
- "distilbert.transformer.layer.3.sa_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
75
- "distilbert.transformer.layer.3.sa_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
76
- "distilbert.transformer.layer.4.attention.k_lin.bias": "pytorch_model-00005-of-00006.bin",
77
- "distilbert.transformer.layer.4.attention.k_lin.weight": "pytorch_model-00005-of-00006.bin",
78
- "distilbert.transformer.layer.4.attention.out_lin.bias": "pytorch_model-00005-of-00006.bin",
79
- "distilbert.transformer.layer.4.attention.out_lin.weight": "pytorch_model-00005-of-00006.bin",
80
- "distilbert.transformer.layer.4.attention.q_lin.bias": "pytorch_model-00005-of-00006.bin",
81
- "distilbert.transformer.layer.4.attention.q_lin.weight": "pytorch_model-00005-of-00006.bin",
82
- "distilbert.transformer.layer.4.attention.v_lin.bias": "pytorch_model-00005-of-00006.bin",
83
- "distilbert.transformer.layer.4.attention.v_lin.weight": "pytorch_model-00005-of-00006.bin",
84
- "distilbert.transformer.layer.4.ffn.lin1.bias": "pytorch_model-00005-of-00006.bin",
85
- "distilbert.transformer.layer.4.ffn.lin1.weight": "pytorch_model-00005-of-00006.bin",
86
- "distilbert.transformer.layer.4.ffn.lin2.bias": "pytorch_model-00005-of-00006.bin",
87
- "distilbert.transformer.layer.4.ffn.lin2.weight": "pytorch_model-00005-of-00006.bin",
88
- "distilbert.transformer.layer.4.output_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
89
- "distilbert.transformer.layer.4.output_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
90
- "distilbert.transformer.layer.4.sa_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
91
- "distilbert.transformer.layer.4.sa_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
92
- "distilbert.transformer.layer.5.attention.k_lin.bias": "pytorch_model-00006-of-00006.bin",
93
- "distilbert.transformer.layer.5.attention.k_lin.weight": "pytorch_model-00006-of-00006.bin",
94
- "distilbert.transformer.layer.5.attention.out_lin.bias": "pytorch_model-00006-of-00006.bin",
95
- "distilbert.transformer.layer.5.attention.out_lin.weight": "pytorch_model-00006-of-00006.bin",
96
- "distilbert.transformer.layer.5.attention.q_lin.bias": "pytorch_model-00005-of-00006.bin",
97
- "distilbert.transformer.layer.5.attention.q_lin.weight": "pytorch_model-00005-of-00006.bin",
98
- "distilbert.transformer.layer.5.attention.v_lin.bias": "pytorch_model-00006-of-00006.bin",
99
- "distilbert.transformer.layer.5.attention.v_lin.weight": "pytorch_model-00006-of-00006.bin",
100
- "distilbert.transformer.layer.5.ffn.lin1.bias": "pytorch_model-00006-of-00006.bin",
101
- "distilbert.transformer.layer.5.ffn.lin1.weight": "pytorch_model-00006-of-00006.bin",
102
- "distilbert.transformer.layer.5.ffn.lin2.bias": "pytorch_model-00006-of-00006.bin",
103
- "distilbert.transformer.layer.5.ffn.lin2.weight": "pytorch_model-00006-of-00006.bin",
104
- "distilbert.transformer.layer.5.output_layer_norm.bias": "pytorch_model-00006-of-00006.bin",
105
- "distilbert.transformer.layer.5.output_layer_norm.weight": "pytorch_model-00006-of-00006.bin",
106
- "distilbert.transformer.layer.5.sa_layer_norm.bias": "pytorch_model-00006-of-00006.bin",
107
- "distilbert.transformer.layer.5.sa_layer_norm.weight": "pytorch_model-00006-of-00006.bin",
108
- "pre_classifier.bias": "pytorch_model-00006-of-00006.bin",
109
- "pre_classifier.weight": "pytorch_model-00006-of-00006.bin"
110
- }
111
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/styles.css CHANGED
@@ -222,6 +222,21 @@ html {
222
  font-weight: bold;
223
  }
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  .selection__form {
226
  display: table-row-group;
227
  vertical-align: left;
 
222
  font-weight: bold;
223
  }
224
 
225
+ table {
226
+ width: 100%;
227
+ }
228
+
229
+ .output__list-item_int {
230
+ font-size: 12px;
231
+ color: #2c2161;
232
+ }
233
+
234
+ .output__list-coloreditem_int {
235
+ font-size: 14px;
236
+ color: #3cd0ff;
237
+ font-weight: bold;
238
+ }
239
+
240
  .selection__form {
241
  display: table-row-group;
242
  vertical-align: left;
static/tokenizer_shards/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
static/tokenizer_shards/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
static/tokenizer_shards/tokenizer_config.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "do_lower_case": true,
4
- "mask_token": "[MASK]",
5
- "model_max_length": 512,
6
- "name_or_path": "celise88/distilbert-base-uncased-finetuned-binary-classifier",
7
- "pad_token": "[PAD]",
8
- "sep_token": "[SEP]",
9
- "special_tokens_map_file": null,
10
- "strip_accents": null,
11
- "tokenize_chinese_chars": true,
12
- "tokenizer_class": "DistilBertTokenizer",
13
- "unk_token": "[UNK]"
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/tokenizer_shards/vocab.txt DELETED
The diff for this file is too large to render. See raw diff
 
templates/candidate_matcher.html CHANGED
@@ -34,14 +34,10 @@
34
  <article class="output__section">
35
  <h2 class="output__subtitle">Extracted Skills</h2>
36
  <ul>
37
- {% for word in jobdesc.lower().replace("-"," ").replace(")","").replace("(","").replace(":","").replace(",","").replace("/"," ").split(" ") %}
38
- {% if skills.get(word) == "Skill" %}
39
- <span class="output__list-coloreditem">{{ word }}</span>
40
- {% else %}
41
- <span class="output__list-item">{{ word }}</span>
42
- {% endif %}
43
- {% endfor %}
44
- </ul>
45
  </article>
46
  <article class="output__section">
47
  <h2 class="output__subtitle">We Think Your Job Description Most Closely Matches these Roles</h2>
@@ -81,8 +77,7 @@
81
  </main>
82
  <footer class="footer">
83
  <ul class="footer__text">
84
- <li class="footer__text-item">© 2023 Pathfinder</li>
85
- <li class="footer__text-item">For details on the finetuned distilbert model being used in this step, please see: <a class="footer__text-link" href="https://www.github.com/celise88/Pathfinder">github.com/celise88/Pathfinder</li>
86
  </ul>
87
  </footer>
88
  </body>
 
34
  <article class="output__section">
35
  <h2 class="output__subtitle">Extracted Skills</h2>
36
  <ul>
37
+ {% for skill in skills %}
38
+ <li class="sectionlist__item">{{ skill.replace('.','') }}</li>
39
+ {% endfor %}
40
+ </ul>
 
 
 
 
41
  </article>
42
  <article class="output__section">
43
  <h2 class="output__subtitle">We Think Your Job Description Most Closely Matches these Roles</h2>
 
77
  </main>
78
  <footer class="footer">
79
  <ul class="footer__text">
80
+ <li class="footer__text-item">© 2024 Pathfinder</li>
 
81
  </ul>
82
  </footer>
83
  </body>
templates/find_hire.html CHANGED
@@ -30,7 +30,7 @@
30
  </main>
31
  <footer class="footer">
32
  <ul class="footer__text">
33
- <li class="footer__text-item">© 2023 Pathfinder</li>
34
  </ul>
35
  </footer>
36
  </body>
 
30
  </main>
31
  <footer class="footer">
32
  <ul class="footer__text">
33
+ <li class="footer__text-item">© 2024 Pathfinder</li>
34
  </ul>
35
  </footer>
36
  </body>
templates/find_match.html CHANGED
@@ -33,16 +33,13 @@
33
  <li class="sectionlist__item"><a style="color: #2c2161" href={{ linklist[n] }}>{{ jobpostings[n] }}</a></li>
34
  {% endfor %}
35
  </ul>
36
- {% else %}
37
- <h2 class="pagesubtitle">We're sorry! This page is currently under construction.</h2>
38
- <h2 class="pagesubtitle">Please check back soon to get {{ jobselection }} jobs that are a great match for your skillset and interests!</h2>
39
  {% endif %}
40
  <br>
41
  <br>
42
  </main>
43
  <footer class="footer">
44
  <ul class="footer__text">
45
- <li class="footer__text-item">© 2023 Pathfinder</li>
46
  <li class="footer__text-item">Job postings courtesy of <a class="footer__text-link" href="https://www.onetonline.org">onetonline.org</li>
47
  </ul>
48
  </footer>
 
33
  <li class="sectionlist__item"><a style="color: #2c2161" href={{ linklist[n] }}>{{ jobpostings[n] }}</a></li>
34
  {% endfor %}
35
  </ul>
 
 
 
36
  {% endif %}
37
  <br>
38
  <br>
39
  </main>
40
  <footer class="footer">
41
  <ul class="footer__text">
42
+ <li class="footer__text-item">© 2024 Pathfinder</li>
43
  <li class="footer__text-item">Job postings courtesy of <a class="footer__text-link" href="https://www.onetonline.org">onetonline.org</li>
44
  </ul>
45
  </footer>
templates/find_my_match.html CHANGED
@@ -33,15 +33,11 @@
33
  {% if resume %}
34
  <article class="output__section">
35
  <h2 class="output__subtitle">Extracted Skills</h3>
36
- <ul>
37
- {% for word in resume.lower().replace("-"," ").replace(")","").replace("(","").replace(":","").replace(",","").replace("/"," ").split(" ") %}
38
- {% if skills.get(word) == "Skill" %}
39
- <span class="output__list-coloreditem">{{ word }}</span>
40
- {% else %}
41
- <span class="output__list-item">{{ word }}</span>
42
- {% endif %}
43
- {% endfor %}
44
- </ul>
45
  </article>
46
  <article class="output__section">
47
  <h2 class="output__subtitle">Job Matches</h3>
@@ -87,8 +83,7 @@
87
  </main>
88
  <footer class="footer">
89
  <ul class="footer__text">
90
- <li class="footer__text-item">© 2023 Pathfinder</li>
91
- <li class="footer__text-item">For details on the finetuned distilbert model being used in this step, please see: <a class="footer__text-link" href="https://www.github.com/celise88/Pathfinder">github.com/celise88/Pathfinder</li>
92
  </ul>
93
  </footer>
94
  </body>
 
33
  {% if resume %}
34
  <article class="output__section">
35
  <h2 class="output__subtitle">Extracted Skills</h3>
36
+ <ul class="output__list-coloreditem">
37
+ {% for skill in skills %}
38
+ <li class="sectionlist__item">{{ skill.replace('.','') }}</li>
39
+ {% endfor %}
40
+ </ul>
 
 
 
 
41
  </article>
42
  <article class="output__section">
43
  <h2 class="output__subtitle">Job Matches</h3>
 
83
  </main>
84
  <footer class="footer">
85
  <ul class="footer__text">
86
+ <li class="footer__text-item">© 2024 Pathfinder</li>
 
87
  </ul>
88
  </footer>
89
  </body>
templates/job_list.html CHANGED
@@ -283,7 +283,7 @@
283
  </main>
284
  <footer class="footer">
285
  <ul class="footer__text">
286
- <li class="footer__text-item">© 2023 Pathfinder</li>
287
  <li class="footer__text-item">Information on this page is courtesy of <a class="footer__text-link" href="https://www.onetonline.org">onetonline.org</li>
288
  </ul>
289
  </footer>
 
283
  </main>
284
  <footer class="footer">
285
  <ul class="footer__text">
286
+ <li class="footer__text-item">© 2024 Pathfinder</li>
287
  <li class="footer__text-item">Information on this page is courtesy of <a class="footer__text-link" href="https://www.onetonline.org">onetonline.org</li>
288
  </ul>
289
  </footer>
templates/login.html CHANGED
@@ -50,7 +50,7 @@
50
  </main>
51
  <footer class="footer">
52
  <ul class="footer__text">
53
- <li class="footer__text-item">© 2023 Pathfinder</li>
54
  </ul>
55
  </footer>
56
  </body>
 
50
  </main>
51
  <footer class="footer">
52
  <ul class="footer__text">
53
+ <li class="footer__text-item">© 2024 Pathfinder</li>
54
  </ul>
55
  </footer>
56
  </body>
templates/logout.html CHANGED
@@ -40,7 +40,7 @@
40
  </main>
41
  <footer class="footer">
42
  <ul class="footer__text">
43
- <li class="footer__text-item">© 2023 Pathfinder</li>
44
  </ul>
45
  </footer>
46
  </body>
 
40
  </main>
41
  <footer class="footer">
42
  <ul class="footer__text">
43
+ <li class="footer__text-item">© 2024 Pathfinder</li>
44
  </ul>
45
  </footer>
46
  </body>
templates/register.html CHANGED
@@ -53,7 +53,7 @@
53
  </main>
54
  <footer class="footer">
55
  <ul class="footer__text">
56
- <li class="footer__text-item">© 2023 Pathfinder</li>
57
  </ul>
58
  </footer>
59
  </body>
 
53
  </main>
54
  <footer class="footer">
55
  <ul class="footer__text">
56
+ <li class="footer__text-item">© 2024 Pathfinder</li>
57
  </ul>
58
  </footer>
59
  </body>