Spaces:

MillMin
/

FSA-PROJECT-CV

Build error

App Files Files Community

MillMin commited on Mar 12

Commit

6be4dd1

•

2 Parent(s): c24ff2d ab5e1aa

Merge branch 'main' into NhanHT

Browse files

Files changed (6) hide show

app/modules/crud_question_test/models/crud_question_tests.py +17 -0
app/modules/matching_cv/models/match_cv_jd_model.py +2 -2
app/modules/matching_cv/models/matching_cv_logic.py +1 -1
app/modules/question_tests_retrieval/__init__.py +5 -4
app/modules/question_tests_retrieval/models/question_tests_logic.py +74 -33
requirements.txt +8 -1

app/modules/crud_question_test/models/crud_question_tests.py CHANGED Viewed

@@ -68,6 +68,8 @@ def create_question_test(data):
 def update_question_test(id, data):
     # Update a document by id
     firebase_db.collection("question_tests").document(id).update(data)
     return True
 def delete_question_test(id):
@@ -76,4 +78,19 @@ def delete_question_test(id):
     remove_file_question_tests(file_url)
     # Delete a document by id
     firebase_db.collection("question_tests").document(id).delete()
     return True

 def update_question_test(id, data):
     # Update a document by id
     firebase_db.collection("question_tests").document(id).update(data)
+    # Update corrensponding vector in Qdrant
     return True
 def delete_question_test(id):
     remove_file_question_tests(file_url)
     # Delete a document by id
     firebase_db.collection("question_tests").document(id).delete()
+    # Delete corresponding vector from Qdrant
+    qdrant_client.delete(
+        collection_name="question_tests",
+        points_selector=models.FilterSelector(
+            filter=models.Filter(
+                must=[
+                    models.FieldCondition(
+                        key="id",
+                        match=models.MatchValue(value=id),
+                    ),
+                ],
+            )
+        ),
+    )
     return True

app/modules/matching_cv/models/match_cv_jd_model.py CHANGED Viewed

@@ -2,6 +2,6 @@ from fastapi import APIRouter, UploadFile, File
 class Match_JD_CV_Model:
     jd = UploadFile
-    jd_default = File(..., description="Upload JD file", media_type=["text/plain"])
     cv = UploadFile
-    cv_default = File(..., description="Upload CV file", media_type=["application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"])

 class Match_JD_CV_Model:
     jd = UploadFile
+    jd_default = File(..., description="Upload JD file (only .txt file)", media_type=["text/plain"])
     cv = UploadFile
+    cv_default = File(..., description="Upload CV file (only .pdf and .docx)", media_type=["application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"])

app/modules/matching_cv/models/matching_cv_logic.py CHANGED Viewed

@@ -31,7 +31,7 @@ def result_matching_cv_jd(cv_text, jd_text):
                     Given the following CV and JD, calculate the percentage match between the candidate's qualifications and the job requirements:
                     CV: {cv}
                     JD: {jd}
-                    To determine the match percentage, analyze the skills and experience in the CV and compare them to the requirements outlined in the JD. Provide the final match percentage as a numeric value between 0-100%, along with a brief explanation of your analysis. Follow this json format: {"Skills Match": {"Required Skills": "","Candidate  Skills": "","Match Percentage": "",}, "Experience Match": {"Required Experience": "","Candidate Experience": "","Match Percentage": "",}, "Overall Match Percentage:": ""}
                     """
                 )
             ),

                     Given the following CV and JD, calculate the percentage match between the candidate's qualifications and the job requirements:
                     CV: {cv}
                     JD: {jd}
+                    To determine the match percentage, analyze the skills and experience in the CV and compare them to the requirements outlined in the JD. Provide the final match percentage as a numeric value between 0-100%, along with a brief explanation of your analysis. Follow this json format: {"Skills Match": {"Required Skills": "","Candidate  Skills": "","Match Percentage": "",}, "Experience Match": {"Required Experience": "","Candidate Experience": "","Match Percentage": "",}, "Overall Match Percentage:": "", "Explanation": ""}
                     """
                 )
             ),

app/modules/question_tests_retrieval/__init__.py CHANGED Viewed

@@ -2,8 +2,8 @@ from fastapi import APIRouter, UploadFile, File
 from typing import Annotated
 from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
-# from app.modules.question_tests_retrieval.models.text2tector import text2vector
-from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_test
 qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
@@ -13,15 +13,16 @@ async def index():
 @qtretrieval_router.post("/send_jd")
 # only upload .txt file
-async def send_jd(txt_file: Annotated[UploadFile, File(..., description="The JD file", media_type=["text/plain"])]):
     try:
         # read the txt file with format
         jobdes = txt_file.file.read().decode("utf-8")
         sumaryjd_text = jobdes2text(jobdes)
-        if get_question_test(sumaryjd_text):
             return {"message": "Send JD successfully and get question test successfully",
                     "sumaryjd_text": sumaryjd_text}
         else:
             return {"message": "Please upload only .txt file", "error": str(e)}
     except Exception as e:
         return {"message": "Please upload only .txt file", "error": str(e)}

 from typing import Annotated
 from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
+# from app.modules.question_tests_retrieval.models.text2vector import text2vector
+from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_tests
 qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
 @qtretrieval_router.post("/send_jd")
 # only upload .txt file
+async def send_jd(txt_file: Annotated[UploadFile, File(..., description="The JD file (only .txt file)", media_type=["text/plain"])]):
     try:
         # read the txt file with format
         jobdes = txt_file.file.read().decode("utf-8")
         sumaryjd_text = jobdes2text(jobdes)
+        if get_question_tests(sumaryjd_text):
             return {"message": "Send JD successfully and get question test successfully",
                     "sumaryjd_text": sumaryjd_text}
         else:
             return {"message": "Please upload only .txt file", "error": str(e)}
     except Exception as e:
         return {"message": "Please upload only .txt file", "error": str(e)}

app/modules/question_tests_retrieval/models/question_tests_logic.py CHANGED Viewed

@@ -5,8 +5,10 @@ from langchain_google_genai import GoogleGenerativeAIEmbeddings
 from langchain.evaluation import load_evaluator
 from langchain.evaluation import EmbeddingDistance
-from app.modules.crud_question_test.models.crud_question_tests import get_all_question_tests, get_question_test_url_by_description
 from app.configs.database import firebase_bucket
 # Import API key
 load_dotenv()
@@ -19,45 +21,84 @@ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
 gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
-def compare_vector(vector_extract, vector_des):
-    maxnimun_value = 2
-    for item in vector_des:
-        two_object = (vector_extract, item)
-        x = gemini_evaluator.evaluate_strings(prediction=two_object[0], reference=two_object[1])
-        if x.get('score') < maxnimun_value:
-            maxnimun_value = x.get('score')
-            des_item_choose = item
-    if maxnimun_value == 2:
-        return False
-    elif maxnimun_value < 0.3:
-        return des_item_choose
-    else:
-        return False
-def download_question_test(question_test_url):
     # check folder exist
     if not os.path.exists('data/question_tests'):
         os.makedirs('data/question_tests')
     # download file from firebase storage using "gs://" link
-    name_bucket = question_test_url.split(f"gs://{firebase_bucket.name}/")[1]
-    blob = firebase_bucket.blob(name_bucket)
-    blob.download_to_filename(f'data/question_tests/{name_bucket}')
     return True
-def get_question_test(text):
-    all_question_tests = get_all_question_tests()
-    value_in_des = []
-    for item in all_question_tests:
-        value_in_des.append(item['question_tests_description'])
-    des_item_choose = compare_vector(text, value_in_des)
-    if des_item_choose == False:
-        return "No question test found"
     else:
-        question_test_url = get_question_test_url_by_description(des_item_choose)
-        if download_question_test(question_test_url):
-            return True
-        else:
-            return False

 from langchain.evaluation import load_evaluator
 from langchain.evaluation import EmbeddingDistance
+from app.modules.crud_question_test.models.crud_question_tests import get_question_test_by_id
+from app.modules.question_tests_retrieval.models.text2vector import text2vector
 from app.configs.database import firebase_bucket
+from app.configs.qdrant_db import qdrant_client
 # Import API key
 load_dotenv()
 embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
 gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
+# def compare_vector(vector_extract, vector_des):
+#     maxnimun_value = 2
+#     for item in vector_des:
+#         two_object = (vector_extract, item)
+#         x = gemini_evaluator.evaluate_strings(prediction=two_object[0], reference=two_object[1])
+#         if x.get('score') < maxnimun_value:
+#             maxnimun_value = x.get('score')
+#             des_item_choose = item
+#     if maxnimun_value == 2:
+#         return False
+#     elif maxnimun_value < 0.3:
+#         return des_item_choose
+#     else:
+#         return False
+def compare_vector(description_vector, max_number_of_points=3):
+    similarity_list = qdrant_client.search(
+        collection_name="question_tests",
+        query_vector=description_vector,
+        limit=max_number_of_points,
+        with_vectors=False,
+        with_payload=True,
+    )
+    formatted_similarity_list = []
+    for point in similarity_list:
+        formatted_similarity_list.append({"id": point.payload.get("id"), "score": point.score})
+    return formatted_similarity_list
+# def download_question_test(question_test_url):
+#     # check folder exist
+#     if not os.path.exists('data/question_tests'):
+#         os.makedirs('data/question_tests')
+#     # download file from firebase storage using "gs://" link
+#     name_bucket = question_test_url.split(f"gs://{firebase_bucket.name}/")[1]
+#     blob = firebase_bucket.blob(name_bucket)
+#     blob.download_to_filename(f'data/question_tests/{name_bucket}')
+#     return True
+def download_question_test(question_test_url_list):
     # check folder exist
     if not os.path.exists('data/question_tests'):
         os.makedirs('data/question_tests')
     # download file from firebase storage using "gs://" link
+    for url in question_test_url_list:
+        name_bucket = url.split(f"gs://{firebase_bucket.name}/")[1]
+        blob = firebase_bucket.blob(name_bucket)
+        blob.download_to_filename(f'data/question_tests/{name_bucket}')
     return True
+# def get_question_test(text):
+#     all_question_tests = get_all_question_tests()
+#     value_in_des = []
+#     for item in all_question_tests:
+#         value_in_des.append(item['question_tests_description'])
+#     des_item_choose = compare_vector(text, value_in_des)
+#     if des_item_choose == False:
+#         return "No question test found"
+#     else:
+#         question_test_url = get_question_test_url_by_description(des_item_choose)
+#         if download_question_test(question_test_url):
+#             return True
+#         else:
+#             return False
+def get_question_tests(text):
+    # Get formatted similarity list
+    formatted_similarity_list = compare_vector(text2vector(text))
+    # Get corresponding document url in Firebase and download them
+    question_test_url_list = []
+    for point in formatted_similarity_list:
+        id = point.get("id")
+        question_test_url_list.append(get_question_test_by_id(id).get("question_tests_url"))
+    if download_question_test(question_test_url_list):
+        return True
     else:
+        return False

requirements.txt CHANGED Viewed

@@ -30,13 +30,17 @@ google-generativeai==0.3.2
 google-resumable-media==2.7.0
 googleapis-common-protos==1.62.0
 greenlet==3.0.3
-grpcio==1.62.0
 grpcio-status==1.62.0
 h11==0.14.0
 httpcore==1.0.4
 httplib2==0.22.0
 httptools==0.6.1
 httpx==0.27.0
 idna==3.6
 itsdangerous==2.1.2
 Jinja2==3.1.3
@@ -57,6 +61,7 @@ mypy-extensions==1.0.0
 numpy==1.26.4
 orjson==3.9.15
 packaging==23.2
 proto-plus==1.23.0
 protobuf==4.25.3
 pyasn1==0.5.1
@@ -71,7 +76,9 @@ pyparsing==3.1.2
 python-docx==1.1.0
 python-dotenv==1.0.1
 python-multipart==0.0.9
 PyYAML==6.0.1
 requests==2.31.0
 rsa==4.9
 sniffio==1.3.1

 google-resumable-media==2.7.0
 googleapis-common-protos==1.62.0
 greenlet==3.0.3
+grpcio==1.62.1
 grpcio-status==1.62.0
+grpcio-tools==1.62.1
 h11==0.14.0
+h2==4.1.0
+hpack==4.0.0
 httpcore==1.0.4
 httplib2==0.22.0
 httptools==0.6.1
 httpx==0.27.0
+hyperframe==6.0.1
 idna==3.6
 itsdangerous==2.1.2
 Jinja2==3.1.3
 numpy==1.26.4
 orjson==3.9.15
 packaging==23.2
+portalocker==2.8.2
 proto-plus==1.23.0
 protobuf==4.25.3
 pyasn1==0.5.1
 python-docx==1.1.0
 python-dotenv==1.0.1
 python-multipart==0.0.9
+pywin32==306
 PyYAML==6.0.1
+qdrant-client==1.8.0
 requests==2.31.0
 rsa==4.9
 sniffio==1.3.1