HuyDN commited on
Commit
7796444
1 Parent(s): 53d57fc

Phase 2/HuyDN: add result_cv_matching function

Browse files
.gitignore CHANGED
@@ -2,3 +2,6 @@ venv/
2
  .env
3
  __pycache__/
4
  credentials/
 
 
 
 
2
  .env
3
  __pycache__/
4
  credentials/
5
+ data/CV
6
+ data/JD
7
+ data/QUESTION
app/modules/crud_question_test/__init__.py CHANGED
@@ -20,7 +20,7 @@ async def add_question_test(description: str, role: str, file_question_tests: An
20
  # check if file is pdf or json
21
  if file_question_tests.content_type == "application/pdf":
22
  # create a new document
23
- if create_question_test({"description": description, "role": role, "question_tests": file_question_tests}):
24
  return {"message": "Question test added successfully"}
25
  else:
26
  return {"message": "Error"}
 
20
  # check if file is pdf or json
21
  if file_question_tests.content_type == "application/pdf":
22
  # create a new document
23
+ if create_question_test({"question_tests_description": description, "question_tests_role": role, "question_tests_url": file_question_tests}):
24
  return {"message": "Question test added successfully"}
25
  else:
26
  return {"message": "Error"}
app/modules/matching_cv/__init__.py CHANGED
@@ -1,8 +1,36 @@
1
- from fastapi import APIRouter, UploadFile, File
2
- from typing import Annotated
 
 
 
 
3
 
4
  cvmatching_router = APIRouter(prefix="/cvmatching", tags=["cvmatching"])
5
 
6
  @cvmatching_router.get("/")
7
  async def index():
8
- return {"message": "Welcome to CV matching page"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import docx
2
+
3
+ from fastapi import APIRouter
4
+ from app.modules.matching_cv.models.match_cv_jd_model import Match_JD_CV_Model
5
+
6
+ from app.modules.matching_cv.models.matching_cv_logic import result_matching_cv_jd, load_jd_from_id
7
 
8
  cvmatching_router = APIRouter(prefix="/cvmatching", tags=["cvmatching"])
9
 
10
  @cvmatching_router.get("/")
11
  async def index():
12
+ return {"message": "Welcome to CV matching page"}
13
+
14
+ @cvmatching_router.post("/matching")
15
+ # only upload .pdf or .docx file
16
+ async def matching_cv_jd(
17
+ jd_upload: Match_JD_CV_Model.jd = Match_JD_CV_Model.jd_default,
18
+ cv_upload: Match_JD_CV_Model.cv = Match_JD_CV_Model.cv_default):
19
+ try:
20
+ # take jd_upload and cv_upload type file
21
+ jd_upload_type = jd_upload.filename.split(".")[-1]
22
+ cv_upload_type = cv_upload.filename.split(".")[-1]
23
+ if jd_upload_type in ["txt"] and cv_upload_type in ["pdf", "docx"]:
24
+ jd_text = jd_upload.file.read().decode("utf-8")
25
+ if cv_upload_type == "docx":
26
+ cv_text = docx.Document(cv_upload.file).paragraphs
27
+ cv_text = "\n".join([i.text for i in cv_text])
28
+ elif cv_upload_type == "pdf":
29
+ return {"message": "This feature is not available yet"}
30
+ # check matching cv and jd
31
+ result = result_matching_cv_jd(cv_text, jd_text)
32
+ return {"result": result}
33
+ else:
34
+ return {"message": "Please upload only .txt for JD. And .pdf or .docx file for CV"}
35
+ except Exception as e:
36
+ return {"Error": str(e)}
app/modules/matching_cv/models/match_cv_jd_model.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File
2
+
3
+ class Match_JD_CV_Model:
4
+ jd = UploadFile
5
+ jd_default = File(..., description="Upload JD file", media_type=["text/plain"])
6
+ cv = UploadFile
7
+ cv_default = File(..., description="Upload CV file", media_type=["application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"])
app/modules/matching_cv/models/matching_cv_logic.py CHANGED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import docx
3
+ from dotenv import load_dotenv
4
+
5
+ # import prompt template
6
+ from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
7
+ from langchain_core.messages import SystemMessage
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+
10
+ # import the json oupput parser from the langchain core
11
+ from langchain_core.output_parsers import JsonOutputParser
12
+
13
+ # define the parser object
14
+ parser = JsonOutputParser()
15
+
16
+ # Import API key
17
+ load_dotenv()
18
+
19
+ # Define the google api key
20
+ os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
21
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
22
+
23
+ # def load cv from docx file
24
+ def load_cv_from_docx(file):
25
+ doc = docx.Document(file)
26
+ fullText = []
27
+ for para in doc.paragraphs:
28
+ fullText.append(para.text)
29
+ return '\n'.join(fullText)
30
+
31
+ # def matching cv and jd return percentage of matching using prompt template
32
+ def result_matching_cv_jd(cv_text, jd_text):
33
+ # create the prompt template
34
+ chat_template = ChatPromptTemplate.from_messages(
35
+ [
36
+ SystemMessage(
37
+ content=(
38
+ """
39
+ Given the following CV and JD, calculate the percentage match between the candidate's qualifications and the job requirements:
40
+ CV: {cv}
41
+ JD: {jd}
42
+ To determine the match percentage, analyze the skills and experience in the CV and compare them to the requirements outlined in the JD. Provide the final match percentage as a numeric value between 0-100%, along with a brief explanation of your analysis. Follow this json format: {"Skills Match": {"Required Skills": "","Candidate Skills": "","Match Percentage": "",}, "Experience Match": {"Required Experience": "","Candidate Experience": "","Match Percentage": "",}, "Overall Match Percentage:": ""}
43
+ """
44
+ )
45
+ ),
46
+ HumanMessagePromptTemplate.from_template(["{cv}", "{jd}"]),
47
+ ]
48
+ )
49
+
50
+ # create the chat message
51
+ chat_message = chat_template.format_messages(cv=cv_text, jd=jd_text)
52
+
53
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, convert_system_message_to_human=True, api_key=GOOGLE_API_KEY)
54
+ chain = llm | parser
55
+ result = chain.invoke(chat_message)
56
+
57
+ return result
58
+
59
+ def load_jd_from_id():
60
+ pass
app/modules/question_tests_retrieval/models/{text2tector.py → text2vector.py} RENAMED
File without changes