File size: 2,265 Bytes
a16181d
 
 
 
 
fa09cc6
a16181d
 
fa09cc6
 
 
 
a16181d
 
 
 
c8477d8
 
a16181d
 
c8477d8
a16181d
 
 
 
 
 
 
 
 
b244901
a16181d
c8477d8
a16181d
 
 
 
 
 
 
e00720e
b244901
e00720e
a16181d
 
e00720e
a16181d
 
e00720e
a16181d
e00720e
a16181d
 
b244901
 
 
a16181d
 
 
b244901
 
 
 
a16181d
 
 
 
 
c8477d8
a16181d
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import threading
from functions import extract_text_from_pdf, get_most_similar_job
from fastapi import  UploadFile, HTTPException, FastAPI
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import pipeline




summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
print("\n\n definition 2")



df = pd.read_csv("all.csv")
concatenated_column = pd.concat([df['job_title'] + df['job_description'] + df['job_requirements'], df['city_name']], axis=1).astype(str).agg(''.join, axis=1)
x = concatenated_column
y = df["label"]
vectorizer = TfidfVectorizer(stop_words='english')
print("df done")
vectorizer.fit(x)
df_vect = vectorizer.transform(x)
# Initialize the summarizer model



######### using summarizer model
summ_data = []



print("start api code")
app = FastAPI(project_name="cv")

@app.get("/")
async def read_root():
    return {"Hello": "World, Project name is : CV Description"}

@app.post("/prediction")
async def detect(cv: UploadFile, number_of_jobs: int):
    print("pf")
    
    if (type(number_of_jobs) != int) or (number_of_jobs < 1) or (number_of_jobs > df.shape[0]):
        raise HTTPException(
            status_code=415, detail = f"Please enter the number of jobs you want as an ' integer from 1 to {int(df.shape[0]) - 1} '."
        )
    
    if cv.filename.split(".")[-1] not in ("pdf") :
        raise HTTPException(
            status_code=415, detail="Please inter PDF file "
        )

    print("pf2")
    
    summ_data =[]
    cv_data = extract_text_from_pdf(await cv.read())
    index = len(cv_data)//3
    text = [cv_data[:index], cv_data[index:2*index], cv_data[2*index:]]
    for i in text:
        part = summarizer(i, max_length=150, min_length=30, do_sample=False)
        summ_data.append(part[0]["summary_text"].replace("\xa0", ""))
    print("pf3")
    data = " .".join(summ_data)
    summ_data.clear()
    cv_vect = vectorizer.transform([data])
    indices = get_most_similar_job(data=data, cv_vect=cv_vect, df_vect=df_vect)
    # Check if all threads have finished
    print("ALL Done \n\n")
    
    prediction_data = df.iloc[indices[:number_of_jobs]].applymap(lambda x: str(x)).to_dict(orient='records')
    


    return {"prediction": prediction_data}