Spaces:
Sleeping
Sleeping
File size: 2,265 Bytes
a16181d fa09cc6 a16181d fa09cc6 a16181d c8477d8 a16181d c8477d8 a16181d b244901 a16181d c8477d8 a16181d e00720e b244901 e00720e a16181d e00720e a16181d e00720e a16181d e00720e a16181d b244901 a16181d b244901 a16181d c8477d8 a16181d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import threading
from functions import extract_text_from_pdf, get_most_similar_job
from fastapi import UploadFile, HTTPException, FastAPI
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
print("\n\n definition 2")
df = pd.read_csv("all.csv")
concatenated_column = pd.concat([df['job_title'] + df['job_description'] + df['job_requirements'], df['city_name']], axis=1).astype(str).agg(''.join, axis=1)
x = concatenated_column
y = df["label"]
vectorizer = TfidfVectorizer(stop_words='english')
print("df done")
vectorizer.fit(x)
df_vect = vectorizer.transform(x)
# Initialize the summarizer model
######### using summarizer model
summ_data = []
print("start api code")
app = FastAPI(project_name="cv")
@app.get("/")
async def read_root():
return {"Hello": "World, Project name is : CV Description"}
@app.post("/prediction")
async def detect(cv: UploadFile, number_of_jobs: int):
print("pf")
if (type(number_of_jobs) != int) or (number_of_jobs < 1) or (number_of_jobs > df.shape[0]):
raise HTTPException(
status_code=415, detail = f"Please enter the number of jobs you want as an ' integer from 1 to {int(df.shape[0]) - 1} '."
)
if cv.filename.split(".")[-1] not in ("pdf") :
raise HTTPException(
status_code=415, detail="Please inter PDF file "
)
print("pf2")
summ_data =[]
cv_data = extract_text_from_pdf(await cv.read())
index = len(cv_data)//3
text = [cv_data[:index], cv_data[index:2*index], cv_data[2*index:]]
for i in text:
part = summarizer(i, max_length=150, min_length=30, do_sample=False)
summ_data.append(part[0]["summary_text"].replace("\xa0", ""))
print("pf3")
data = " .".join(summ_data)
summ_data.clear()
cv_vect = vectorizer.transform([data])
indices = get_most_similar_job(data=data, cv_vect=cv_vect, df_vect=df_vect)
# Check if all threads have finished
print("ALL Done \n\n")
prediction_data = df.iloc[indices[:number_of_jobs]].applymap(lambda x: str(x)).to_dict(orient='records')
return {"prediction": prediction_data}
|