HuyDN commited on
Commit
b151934
1 Parent(s): 826e112

Phase2/HuyDN: update cv_matching and question_test_retrieval

Browse files
app/modules/crud_cvs/models/crud_cvs.py CHANGED
@@ -1,5 +1,7 @@
1
  import uuid
2
  from app.configs.database import firebase_bucket, firebase_db
 
 
3
 
4
  # CRUD operation
5
  def upload_file_cvs(file):
@@ -16,11 +18,22 @@ def remove_file_cvs(file_url):
16
  blob.delete()
17
  return True
18
 
19
- def download_file_cvs(file_url):
20
  # download file from firebase storage using "gs://" link
21
  blob = firebase_bucket.blob(file_url.split(f"gs://{firebase_bucket.name}/")[1])
22
  # download file and return string in file
23
- return blob.download_as_text()
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def get_all_cvs():
26
  # Get all documents from the collection
 
1
  import uuid
2
  from app.configs.database import firebase_bucket, firebase_db
3
+ import io
4
+ from docx import Document
5
 
6
  # CRUD operation
7
  def upload_file_cvs(file):
 
18
  blob.delete()
19
  return True
20
 
21
+ def file_cv_doc2text(file_url):
22
  # download file from firebase storage using "gs://" link
23
  blob = firebase_bucket.blob(file_url.split(f"gs://{firebase_bucket.name}/")[1])
24
  # download file and return string in file
25
+ file_bytes = blob.download_as_bytes()
26
+ # Create a BytesIO object from the file bytes
27
+ file_stream = io.BytesIO(file_bytes)
28
+ # Read the .docx file from the BytesIO object
29
+ doc = Document(file_stream)
30
+ # Extract text from the .docx file
31
+ text = ""
32
+ for paragraph in doc.paragraphs:
33
+ text += paragraph.text + "\n"
34
+
35
+ return text
36
+
37
 
38
  def get_all_cvs():
39
  # Get all documents from the collection
app/modules/crud_jds/models/crud_jds.py CHANGED
@@ -17,7 +17,7 @@ def remove_file_jds(file_url):
17
  blob.delete()
18
  return True
19
 
20
- def download_file_jds(file_url):
21
  # download file from firebase storage using "gs://" link
22
  blob = firebase_bucket.blob(file_url.split(f"gs://{firebase_bucket.name}/")[1])
23
  # download file and return string in file
 
17
  blob.delete()
18
  return True
19
 
20
+ def file_jd_txt2text(file_url):
21
  # download file from firebase storage using "gs://" link
22
  blob = firebase_bucket.blob(file_url.split(f"gs://{firebase_bucket.name}/")[1])
23
  # download file and return string in file
app/modules/matching_cv/__init__.py CHANGED
@@ -1,9 +1,11 @@
1
  import docx
2
 
3
  from fastapi import APIRouter
4
- from app.modules.matching_cv.models.match_cv_jd_model import Match_JD_CV_Model
5
 
6
  from app.modules.matching_cv.models.matching_cv_logic import result_matching_cv_jd
 
 
7
 
8
  cvmatching_router = APIRouter(prefix="/cvmatching", tags=["cvmatching"])
9
 
@@ -15,7 +17,28 @@ async def index():
15
  # only upload .pdf or .docx file
16
  async def matching_cv_jd(id_jd: str, id_cv:str):
17
  try:
18
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # # take jd_upload and cv_upload type file
20
  # jd_upload_type = jd_upload.filename.split(".")[-1]
21
  # cv_upload_type = cv_upload.filename.split(".")[-1]
 
1
  import docx
2
 
3
  from fastapi import APIRouter
4
+ # from app.modules.matching_cv.models.match_cv_jd_model import Match_JD_CV_Model
5
 
6
  from app.modules.matching_cv.models.matching_cv_logic import result_matching_cv_jd
7
+ from app.modules.crud_jds.models.crud_jds import get_jd_by_id, file_jd_txt2text
8
+ from app.modules.crud_cvs.models.crud_cvs import get_cv_by_id, file_cv_doc2text
9
 
10
  cvmatching_router = APIRouter(prefix="/cvmatching", tags=["cvmatching"])
11
 
 
17
  # only upload .pdf or .docx file
18
  async def matching_cv_jd(id_jd: str, id_cv:str):
19
  try:
20
+ # get jd and cv by id
21
+ jd_document = get_jd_by_id(id_jd)
22
+ cv_document = get_cv_by_id(id_cv)
23
+
24
+ # download file from firebase storage
25
+ jd_url = jd_document["jd_url"]
26
+ cv_url = cv_document["cv_url"]
27
+
28
+ # get type file cv from cv_url "gs://bucket_name/file_name"
29
+ cv_type = cv_url.split(".")[-1]
30
+ if cv_type == "pdf":
31
+ return {"message": "This feature is not available yet"}
32
+ elif cv_type == "docx":
33
+ cv_text = file_cv_doc2text(cv_url)
34
+ else:
35
+ return {"message": "Please upload only .pdf or .docx file for CV"}
36
+
37
+ # get jd_text from jd_url "gs://bucket_name/file_name"
38
+ jd_text = file_jd_txt2text(jd_url)
39
+
40
+ result = result_matching_cv_jd(cv_text, jd_text)
41
+ return {"result": result}
42
  # # take jd_upload and cv_upload type file
43
  # jd_upload_type = jd_upload.filename.split(".")[-1]
44
  # cv_upload_type = cv_upload.filename.split(".")[-1]
app/modules/question_tests_retrieval/__init__.py CHANGED
@@ -3,7 +3,7 @@ from typing import Annotated
3
 
4
  from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
5
  from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_tests
6
- from app.modules.crud_jds.models.crud_jds import get_jd_by_id, download_file_jds
7
 
8
  qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
9
 
@@ -17,7 +17,7 @@ async def send_jd_to_get_question(id_jd: str):
17
  try:
18
  jd_document = get_jd_by_id(id_jd)
19
  # download jd file from firebase storage
20
- jd_file_string = download_file_jds(jd_document["jd_url"])
21
  sumaryjd_text = jobdes2text(jd_file_string)
22
  if get_question_tests(sumaryjd_text):
23
  return {"message": "Send JD successfully and get question test successfully",
 
3
 
4
  from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
5
  from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_tests
6
+ from app.modules.crud_jds.models.crud_jds import get_jd_by_id, file_jd_txt2text
7
 
8
  qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
9
 
 
17
  try:
18
  jd_document = get_jd_by_id(id_jd)
19
  # download jd file from firebase storage
20
+ jd_file_string = file_jd_txt2text(jd_document["jd_url"])
21
  sumaryjd_text = jobdes2text(jd_file_string)
22
  if get_question_tests(sumaryjd_text):
23
  return {"message": "Send JD successfully and get question test successfully",