File size: 3,231 Bytes
c2b649f
 
 
 
 
 
 
70bd4f2
 
 
 
 
c2b649f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7202869
c2b649f
 
 
 
7202869
c2b649f
 
 
 
e7617a8
c2b649f
 
 
 
 
 
7202869
c2b649f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a6af66
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from fastapi import FastAPI, HTTPException
from models import CVExtracted, InsertedText, JobAndCV, ClassificationResult, InsertedLink
import os
from io import BytesIO
from datetime import datetime
from PyPDF2 import PdfReader
import requests
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info("Starting...")
import extractor
import classificator

os.environ['HF_HOME'] = '/transformers_cache'

app =  FastAPI()
@app.get("/", response_model=dict[str, str])
def getall():
    return {"hello":"world"}


@app.post("/ext", response_model=CVExtracted)
async def extract(text: InsertedText):
    dictresult = extractor.predict(text.text)
    return CVExtracted(**dictresult)


@app.post("/classify", response_model=ClassificationResult)
async def classify(body:JobAndCV):
    mininmal_start = 0
    maximal_end = 0
    positions = []
    userMajors = []
    yoe = 0
    if len(body.cv.experiences) > 0:
        mininmal_start = datetime.strptime(body.cv.experiences[0]['start'], "%Y-%m-%d").date() if body.cv.experiences[0].get('start') != None else datetime.today().date()
        maximal_end = datetime.strptime(body.cv.experiences[0]['end'], "%Y-%m-%d").date() if body.cv.experiences[0].get('end') != None else datetime.today().date()
        for exp in body.cv.experiences:
            positions.append(exp['position'])
            if exp.get('end') == None:
                exp['end'] = datetime.today().strftime("%Y-%m-%d")
            if datetime.strptime(exp['start'], "%Y-%m-%d").date() < mininmal_start:
                mininmal_start = datetime.strptime(exp['start'], "%Y-%m-%d").date()
            if datetime.strptime(exp['end'], "%Y-%m-%d").date() > maximal_end:
                maximal_end = datetime.strptime(exp['end'], "%Y-%m-%d").date()
        yoe = (maximal_end - mininmal_start).days//365  
    
    for edu in body.cv.educations:
        userMajors.append(edu['major'])
    
    cv = {
        "experiences": str(body.cv.experiences), 
        "positions": str(positions), 
        "userMajors": str(userMajors), 
        "skills": str(body.cv.skills), 
        "yoe": yoe
    }
    job = {
        "jobDesc": body.job.jobDesc, 
        "role": body.job.role, 
        "majors": str(body.job.majors), 
        "skills": str(body.job.skills), 
        "minYoE": body.job.minYoE
    }
    results = classificator.predict(cv, job)
    return ClassificationResult(**results)

@app.post("/cv", response_model=CVExtracted)
async def extract(link: InsertedLink):
    response = requests.get(link.link)
    if response.status_code == 200:
        # Open the PDF from bytes in memory
        pdf_reader = PdfReader(BytesIO(response.content))
        number_of_pages = len(pdf_reader.pages)
        # Optionally, read text from the first page
        page = pdf_reader.pages[0]
        text = page.extract_text()
        for i in range(1, number_of_pages):
            text+= '\n' + pdf_reader.pages[i].extract_text()
    else:
        #return error, make 500 because file server error
        raise HTTPException(status_code=response.status_code, detail="File server error")

    dictresult = extractor.predict(text)
    return CVExtracted(**dictresult)