abhibisht89 commited on
Commit
e5e34f2
1 Parent(s): 1f96c50

create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import gradio as gr
3
+ from spacy.pipeline import EntityRuler
4
+ from spacy import displacy
5
+ import jsonlines
6
+ nlp = spacy.load('en_core_web_sm')
7
+
8
+ # Create list with entity labels from jsonl file
9
+ with jsonlines.open("skill_patterns.jsonl") as f:
10
+ created_entities = [line['label'].upper() for line in f.iter()]
11
+
12
+ def extract_text_from_word(txt):
13
+ '''Opens en reads in a .doc or .docx file from path'''
14
+ return txt.replace('\n', ' ').replace('\t', ' ').lower()
15
+
16
+ def add_newruler_to_pipeline(skill_pattern_path):
17
+ '''Reads in all created patterns from a JSONL file and adds it to the pipeline after PARSER and before NER'''
18
+
19
+ # new_ruler = EntityRuler(nlp).from_disk(skill_pattern_path)
20
+ ruler=nlp.add_pipe("entity_ruler",after='parser')
21
+ ruler.from_disk(skill_pattern_path) # loads patterns only
22
+
23
+ def create_skill_set(doc):
24
+ '''Create a set of the extracted skill entities of a doc'''
25
+
26
+ return set([ent.label_.upper()[6:] for ent in doc.ents if 'skill' in ent.label_.lower()])
27
+
28
+ def create_skillset_dict(resume_names, resume_texts):
29
+ '''Create a dictionary containing a set of the extracted skills. Name is key, matching skillset is value'''
30
+ skillsets = [create_skill_set(resume_text) for resume_text in resume_texts]
31
+ return dict(zip(resume_names, skillsets))
32
+
33
+ def match_skills(vacature_set, cv_set, resume_name):
34
+ '''Get intersection of resume skills and job offer skills and return match percentage'''
35
+
36
+ if len(vacature_set) < 1:
37
+ print('could not extract skills from job offer text')
38
+ else:
39
+ pct_match = round(len(vacature_set.intersection(cv_set[resume_name])) / len(vacature_set) * 100, 0)
40
+ print(resume_name + " has a {}% skill match on this job offer".format(pct_match))
41
+ print('Required skills: {} '.format(vacature_set))
42
+ print('Matched skills: {} \n'.format(vacature_set.intersection(skillset_dict[resume_name])))
43
+
44
+ return (resume_name, pct_match)
45
+
46
+ add_newruler_to_pipeline("skill_patterns.jsonl")
47
+
48
+ def match(CV,JD):
49
+ resume_texts=[]
50
+ resume_texts.append(nlp(CV))
51
+ resume_names=['ABHI']
52
+ skillset_dict = create_skillset_dict(resume_names, resume_texts)
53
+ jd_skillset = create_skill_set(nlp(JD))
54
+ match_pairs = [match_skills(jd_skillset, skillset_dict, name) for name in skillset_dict.keys()]
55
+ return match_pairs
56
+
57
+ exp=["Who is steve jobs?","What is coldplay?","What is a turing test?","What is the most interesting thing about our universe?","What are the most beautiful places on earth?"]
58
+
59
+ desc="This is a semantic search engine powered by SentenceTransformers (Nils_Reimers) with a retrieval and reranking system on Wikipedia corous. This will return the top 5 results. So Quest on with Transformers."
60
+
61
+ inp1=gr.inputs.Textbox(lines=10, placeholder=None, default="", label="CV")
62
+ inp2=gr.inputs.Textbox(lines=10, placeholder=None, default="", label="JD")
63
+
64
+ out=gr.outputs.Textbox(type="auto",label="search results")
65
+
66
+ iface = gr.Interface(fn=match, inputs=[inp1,inp2], outputs=[out],examples=[exp],article=desc,title="Neural Search Engine",theme="huggingface",layout='vertical')
67
+ iface.launch()