CV_Analyze / utils /parser.py
Danial7's picture
Create utils/parser.py
8251764 verified
raw
history blame contribute delete
519 Bytes
from pdfminer.high_level import extract_text
import spacy
nlp = spacy.load("en_core_web_sm")
def parse_cv(file) -> tuple[str, str]:
text = extract_text(file)
doc = nlp(text)
education_keywords = ["Bachelor", "Master", "PhD", "High School", "Diploma"]
education_level = "Not Found"
for sent in doc.sents:
for keyword in education_keywords:
if keyword.lower() in sent.text.lower():
education_level = keyword
break
return text, education_level