HuyDN commited on
Commit
d6ef950
1 Parent(s): 402adf5

Phase2/HuyDN: Optimization speed and fix bugs

Browse files
app/configs/database.py CHANGED
@@ -58,4 +58,10 @@ if "cvs" not in [collection.id for collection in firebase_db.collections()]:
58
  if "jds" not in [collection.id for collection in firebase_db.collections()]:
59
  # Create a collection
60
  firebase_db.collection("jds").add({"position_applied_for": "test","jd_url": "test"})
61
- print("Collection jds created")
 
 
 
 
 
 
 
58
  if "jds" not in [collection.id for collection in firebase_db.collections()]:
59
  # Create a collection
60
  firebase_db.collection("jds").add({"position_applied_for": "test","jd_url": "test"})
61
+ print("Collection jds created")
62
+
63
+ # check if have rag_question_tests collection
64
+ if "rag_question_tests" not in [collection.id for collection in firebase_db.collections()]:
65
+ # Create a collection
66
+ firebase_db.collection("rag_question_tests").add({"id_jd": "test","question_generator_tests_url": "test"})
67
+ print("Collection rag_question_tests created")
app/configs/qdrant_db.py CHANGED
@@ -10,14 +10,21 @@ qdrant_client = QdrantClient(
10
  url = os.getenv("QDRANT_URL"),
11
  api_key = os.getenv("QDRANT_API_KEY"),
12
  )
 
13
 
14
- try:
15
- collection_info = qdrant_client.get_collection("question_tests")
16
- except Exception as e:
17
  qdrant_client.create_collection(
18
  collection_name="question_tests",
19
  vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
20
- )
21
-
22
- print("Qdrant Database connected")
23
-
 
 
 
 
 
 
 
 
10
  url = os.getenv("QDRANT_URL"),
11
  api_key = os.getenv("QDRANT_API_KEY"),
12
  )
13
+ print("Qdrant Database connected")
14
 
15
+ # 2. Check if the question_tests exists
16
+ if qdrant_client.collection_exists('question_tests') == False:
 
17
  qdrant_client.create_collection(
18
  collection_name="question_tests",
19
  vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
20
+ )
21
+ print("Collection question_tests created")
22
+ # 3. Check if the rag_documents_test exists
23
+ elif qdrant_client.collection_exists('rag_documents_test') == False:
24
+ qdrant_client.create_collection(
25
+ collection_name="rag_documents_test",
26
+ vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
27
+ )
28
+ print("Collection rag_documents_test created")
29
+ else:
30
+ print("Collections already exist")
app/modules/__init__.py CHANGED
@@ -6,6 +6,7 @@ from app.modules.matching_cv import cvmatching_router
6
  from app.modules.crud_question_test import crud_question_tests_router
7
  from app.modules.crud_cvs import crud_cvs_router
8
  from app.modules.crud_jds import crud_jds_router
 
9
 
10
  modules_router = APIRouter(prefix="/modules", tags=["modules"])
11
  modules_router.include_router(qtretrieval_router)
@@ -13,6 +14,7 @@ modules_router.include_router(cvmatching_router)
13
  modules_router.include_router(crud_question_tests_router)
14
  modules_router.include_router(crud_cvs_router)
15
  modules_router.include_router(crud_jds_router)
 
16
 
17
  @modules_router.get("/")
18
  async def index():
 
6
  from app.modules.crud_question_test import crud_question_tests_router
7
  from app.modules.crud_cvs import crud_cvs_router
8
  from app.modules.crud_jds import crud_jds_router
9
+ from app.modules.crud_rag_question_tests import crud_rag_question_tests_router
10
 
11
  modules_router = APIRouter(prefix="/modules", tags=["modules"])
12
  modules_router.include_router(qtretrieval_router)
 
14
  modules_router.include_router(crud_question_tests_router)
15
  modules_router.include_router(crud_cvs_router)
16
  modules_router.include_router(crud_jds_router)
17
+ modules_router.include_router(crud_rag_question_tests_router)
18
 
19
  @modules_router.get("/")
20
  async def index():
app/modules/crud_cvs/__init__.py CHANGED
@@ -21,7 +21,7 @@ async def add_cv(name_candidate: str, apply_position: str, file_cv: Annotated[Up
21
  file_cv_type = file_cv.filename.split(".")[-1]
22
  if file_cv_type in ["pdf", "docx"]:
23
  # create a new document
24
- if create_cv({"name_candidate": name_candidate, "apply_position":apply_position, "cv_url": file_cv}):
25
  return {"message": "CV added successfully"}
26
  else:
27
  return {"message": "Error while adding CV file to database"}
 
21
  file_cv_type = file_cv.filename.split(".")[-1]
22
  if file_cv_type in ["pdf", "docx"]:
23
  # create a new document
24
+ if create_cv({"name_candidate": name_candidate, "apply_position":apply_position, "cv_content": file_cv}):
25
  return {"message": "CV added successfully"}
26
  else:
27
  return {"message": "Error while adding CV file to database"}
app/modules/crud_cvs/models/crud_cvs.py CHANGED
@@ -1,16 +1,22 @@
1
  import uuid
2
- from app.configs.database import firebase_bucket, firebase_db
3
  import io
 
 
 
4
  from docx import Document
 
 
 
5
 
6
  # CRUD operation
7
- def upload_file_cvs(file):
8
- re_name_file = str(uuid.uuid4()).replace("-","_") + "_" + file.filename
9
- # upload file to firebase storage
10
- blob = firebase_bucket.blob(re_name_file)
11
- blob.upload_from_file(file.file)
12
  # return gs link
13
- return f"gs://{firebase_bucket.name}/{re_name_file}"
14
 
15
  def remove_file_cvs(file_url):
16
  # remove file from firebase storage using "gs://" link
@@ -18,22 +24,28 @@ def remove_file_cvs(file_url):
18
  blob.delete()
19
  return True
20
 
21
- def file_cv_doc2text(file_url):
22
- # download file from firebase storage using "gs://" link
23
- blob = firebase_bucket.blob(file_url.split(f"gs://{firebase_bucket.name}/")[1])
24
- # download file and return string in file
25
- file_bytes = blob.download_as_bytes()
26
- # Create a BytesIO object from the file bytes
27
- file_stream = io.BytesIO(file_bytes)
28
- # Read the .docx file from the BytesIO object
29
- doc = Document(file_stream)
30
  # Extract text from the .docx file
31
  text = ""
32
  for paragraph in doc.paragraphs:
33
  text += paragraph.text + "\n"
34
-
35
  return text
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def get_all_cvs():
39
  # Get all documents from the collection
@@ -51,15 +63,45 @@ def get_cv_by_id(id):
51
  return doc.to_dict()
52
 
53
  def create_cv(data):
54
- # get file_cvs
55
- file_cvs = data["cv_url"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # upload file to firebase storage
57
- file_url = upload_file_cvs(file_cvs)
 
 
 
 
 
 
 
 
 
 
58
  # add file url to data
59
- data["cv_url"] = file_url
 
 
 
 
60
  # Create a new document
61
- document_ref = firebase_db.collection("cvs").add(data)
62
- # document_id = document_ref[1].id
63
  return True
64
 
65
  def delete_cv(id):
 
1
  import uuid
2
+ import pytz
3
  import io
4
+ import os
5
+
6
+ from app.configs.database import firebase_bucket, firebase_db
7
  from docx import Document
8
+ from datetime import datetime
9
+
10
+ from langchain_community.document_loaders import UnstructuredPDFLoader
11
 
12
  # CRUD operation
13
+ def upload_file_cvs(file_path):
14
+ # upload file to firebase storage from file_path
15
+ name_file = file_path.split("/")[-1]
16
+ blob = firebase_bucket.blob(name_file)
17
+ blob.upload_from_filename(file_path)
18
  # return gs link
19
+ return f"gs://{firebase_bucket.name}/{name_file}"
20
 
21
  def remove_file_cvs(file_url):
22
  # remove file from firebase storage using "gs://" link
 
24
  blob.delete()
25
  return True
26
 
27
+ def file_cv_doc2text(file_path):
28
+ # Read the .docx file from file
29
+ doc = Document(file_path)
 
 
 
 
 
 
30
  # Extract text from the .docx file
31
  text = ""
32
  for paragraph in doc.paragraphs:
33
  text += paragraph.text + "\n"
 
34
  return text
35
 
36
+ # def load cv from docx file
37
+ def file_cv_pdf2text(file_path):
38
+ # Read the .pdf file from the BytesIO object
39
+ loader = UnstructuredPDFLoader(file_path)
40
+ json_result = loader.load()
41
+ # take page_content from json_result
42
+ page_content = json_result[0].page_content
43
+ return page_content
44
+
45
+ def get_cv_content_by_id(id_cv):
46
+ # Get a document by id
47
+ doc = firebase_db.collection("cvs").document(id_cv).get()
48
+ return doc.to_dict()["cv_content"]
49
 
50
  def get_all_cvs():
51
  # Get all documents from the collection
 
63
  return doc.to_dict()
64
 
65
  def create_cv(data):
66
+ # get file_cv
67
+ file_cv = data["cv_content"]
68
+ # rename file name to uuid
69
+ re_name_file = str(uuid.uuid4()).replace("-","_") + "_" + file_cv.filename
70
+ # save uploaded file to tmp folder
71
+ cache_path = f"tmp/{re_name_file}"
72
+ with open(cache_path, "wb") as buffer:
73
+ buffer.write(file_cv.file.read())
74
+
75
+ # take file_cv and cv_upload type file
76
+ file_cv_type = file_cv.filename.split(".")[-1]
77
+ cv_text = ""
78
+ if file_cv_type == "pdf":
79
+ cv_text = file_cv_pdf2text(cache_path)
80
+ elif file_cv_type == "docx":
81
+ cv_text = file_cv_doc2text(cache_path)
82
+ else:
83
+ return False
84
+
85
  # upload file to firebase storage
86
+ cv_uploaded_url = upload_file_cvs(cache_path)
87
+ # delete file in tmp folder
88
+ os.remove(cache_path)
89
+
90
+ # Get the current time in UTC
91
+ utc_now = datetime.utcnow()
92
+ # Specify the Vietnam time zone
93
+ vietnam_timezone = pytz.timezone('Asia/Ho_Chi_Minh')
94
+ # Convert the current time to Vietnam time zone
95
+ vietnam_now = utc_now.replace(tzinfo=pytz.utc).astimezone(vietnam_timezone).strftime("%Y-%m-%d %H:%M:%S")
96
+
97
  # add file url to data
98
+ data["cv_url"] = cv_uploaded_url
99
+ # add cv_content
100
+ data["cv_content"] = cv_text
101
+ # add created_at
102
+ data["created_at"] = vietnam_now
103
  # Create a new document
104
+ firebase_db.collection("cvs").add(data)
 
105
  return True
106
 
107
  def delete_cv(id):
app/modules/crud_jds/__init__.py CHANGED
@@ -14,13 +14,13 @@ async def index():
14
 
15
  # [POST] add JD
16
  @crud_jds_router.post("/")
17
- # only upload pdf or json file
18
  async def add_jd(position_applied_for: str, file_jd: Annotated[UploadFile, File(..., description="Upload jd file (upload .txt)")]):
19
  try:
20
  file_jd_type = file_jd.filename.split(".")[-1]
21
  if file_jd_type in ["txt"]:
22
  # create a new document
23
- if create_jd({"position_applied_for": position_applied_for,"jd_url": file_jd}):
24
  return {"message": "JD added successfully"}
25
  else:
26
  return {"message": "Error while adding JD file to database"}
 
14
 
15
  # [POST] add JD
16
  @crud_jds_router.post("/")
17
+ # only upload txt file
18
  async def add_jd(position_applied_for: str, file_jd: Annotated[UploadFile, File(..., description="Upload jd file (upload .txt)")]):
19
  try:
20
  file_jd_type = file_jd.filename.split(".")[-1]
21
  if file_jd_type in ["txt"]:
22
  # create a new document
23
+ if create_jd({"position_applied_for": position_applied_for,"jd_text": file_jd}):
24
  return {"message": "JD added successfully"}
25
  else:
26
  return {"message": "Error while adding JD file to database"}
app/modules/crud_jds/models/crud_jds.py CHANGED
@@ -1,6 +1,8 @@
1
  import uuid
2
  from app.configs.database import firebase_bucket, firebase_db
3
-
 
 
4
 
5
  # CRUD operation
6
  def upload_file_jds(file):
@@ -17,11 +19,10 @@ def remove_file_jds(file_url):
17
  blob.delete()
18
  return True
19
 
20
- def file_jd_txt2text(file_url):
21
- # download file from firebase storage using "gs://" link
22
- blob = firebase_bucket.blob(file_url.split(f"gs://{firebase_bucket.name}/")[1])
23
- # download file and return string in file
24
- return blob.download_as_text()
25
 
26
  def get_all_jds():
27
  # Get all documents from the collection
@@ -40,20 +41,40 @@ def get_jd_by_id(id):
40
 
41
  def create_jd(data):
42
  # get file_jds
43
- file_jds = data["jd_url"]
44
- # upload file to firebase storage
45
- file_url = upload_file_jds(file_jds)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # add file url to data
47
- data["jd_url"] = file_url
 
 
48
  # Create a new document
49
- document_ref = firebase_db.collection("jds").add(data)
50
- # document_id = document_ref[1].id
51
  return True
52
 
53
  def delete_jd(id):
54
- # Delete a file from firebase storage
55
- file_url = get_jd_by_id(id)["jd_url"]
56
- remove_file_jds(file_url)
57
  # Delete a document by id
58
  firebase_db.collection("jds").document(id).delete()
59
  return True
 
1
  import uuid
2
  from app.configs.database import firebase_bucket, firebase_db
3
+ from datetime import datetime
4
+ import pytz
5
+ import os
6
 
7
  # CRUD operation
8
  def upload_file_jds(file):
 
19
  blob.delete()
20
  return True
21
 
22
+ def get_jd_text_by_id(id_jd):
23
+ # Get a document by id
24
+ doc = firebase_db.collection("jds").document(id_jd).get()
25
+ return doc.to_dict()["jd_text"]
 
26
 
27
  def get_all_jds():
28
  # Get all documents from the collection
 
41
 
42
  def create_jd(data):
43
  # get file_jds
44
+ file_jds = data["jd_text"]
45
+ # change file name to uuid
46
+ re_name_file = str(uuid.uuid4()).replace("-","_") + "_" + file_jds.filename
47
+ # save uploaded file to tmp folder
48
+ with open(f"tmp/{re_name_file}", "wb") as buffer:
49
+ buffer.write(file_jds.file.read())
50
+ # read file
51
+ with open(f"tmp/{re_name_file}", "r", encoding="utf8") as file:
52
+ jd_text = file.read()
53
+ # delete file in tmp folder
54
+ os.remove(f"tmp/{re_name_file}")
55
+
56
+ # # upload file to firebase storage
57
+ # file_url = upload_file_jds(file_jds)
58
+
59
+ # Get the current time in UTC
60
+ utc_now = datetime.utcnow()
61
+ # Specify the Vietnam time zone
62
+ vietnam_timezone = pytz.timezone('Asia/Ho_Chi_Minh')
63
+ # Convert the current time to Vietnam time zone
64
+ vietnam_now = utc_now.replace(tzinfo=pytz.utc).astimezone(vietnam_timezone).strftime("%Y-%m-%d %H:%M:%S")
65
+
66
  # add file url to data
67
+ data["jd_text"] = jd_text
68
+ # add created_at
69
+ data["created_at"] = vietnam_now
70
  # Create a new document
71
+ firebase_db.collection("jds").add(data)
 
72
  return True
73
 
74
  def delete_jd(id):
75
+ # # Delete a file from firebase storage
76
+ # file_url = get_jd_by_id(id)["jd_url"]
77
+ # remove_file_jds(file_url)
78
  # Delete a document by id
79
  firebase_db.collection("jds").document(id).delete()
80
  return True
app/modules/crud_question_test/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
  from fastapi import APIRouter, UploadFile, File
2
  from typing import Annotated
3
 
4
- from app.modules.crud_question_test.models.crud_question_tests import get_all_question_tests, get_question_test_by_id, create_question_test, update_question_test, delete_question_test
5
 
6
  crud_question_tests_router = APIRouter(prefix="/crud_question_tests_router", tags=["crud_question_tests_router"])
7
 
@@ -15,21 +15,22 @@ async def index():
15
  # [POST] add question test
16
  @crud_question_tests_router.post("/")
17
  # only upload pdf or json file
18
- async def add_question_test(description: str, role: str, file_question_tests: Annotated[UploadFile, File(..., description="The question test file", media_type=["application/pdf", "application/json"])]):
19
  try:
 
20
  # check if file is pdf or json
21
- if file_question_tests.content_type == "application/pdf":
22
  # create a new document
23
  if create_question_test({"question_tests_description": description, "question_tests_role": role, "question_tests_url": file_question_tests}):
24
  return {"message": "Question test added successfully"}
25
  else:
26
- return {"message": "Error"}
27
- elif file_question_tests.content_type == "application/json":
28
  # create a new document
29
  if create_question_test({"question_tests_description": description, "question_tests_role": role, "question_tests_url": file_question_tests}):
30
  return {"message": "Question test added successfully"}
31
  else:
32
- return {"message": "Error"}
33
  else:
34
  return {"message": "File type not supported"}
35
  except Exception as e:
 
1
  from fastapi import APIRouter, UploadFile, File
2
  from typing import Annotated
3
 
4
+ from app.modules.crud_question_test.models.crud_question_tests import get_all_question_tests, create_question_test, delete_question_test
5
 
6
  crud_question_tests_router = APIRouter(prefix="/crud_question_tests_router", tags=["crud_question_tests_router"])
7
 
 
15
  # [POST] add question test
16
  @crud_question_tests_router.post("/")
17
  # only upload pdf or json file
18
+ async def add_question_test(description: str, role: str, file_question_tests: Annotated[UploadFile, File(..., description="The question test file (Upload .pdf or .json)", media_type=["application/pdf", "application/json"])]):
19
  try:
20
+ question_tests_upload_type = file_question_tests.filename.split(".")[-1]
21
  # check if file is pdf or json
22
+ if question_tests_upload_type == "pdf":
23
  # create a new document
24
  if create_question_test({"question_tests_description": description, "question_tests_role": role, "question_tests_url": file_question_tests}):
25
  return {"message": "Question test added successfully"}
26
  else:
27
+ return {"message": "Error", "error": str(e)}
28
+ elif question_tests_upload_type == "json":
29
  # create a new document
30
  if create_question_test({"question_tests_description": description, "question_tests_role": role, "question_tests_url": file_question_tests}):
31
  return {"message": "Question test added successfully"}
32
  else:
33
+ return {"message": "Error", "error": str(e)}
34
  else:
35
  return {"message": "File type not supported"}
36
  except Exception as e:
app/modules/crud_question_test/models/crud_question_tests.py CHANGED
@@ -4,6 +4,8 @@ from app.configs.qdrant_db import qdrant_client
4
  from app.configs.qdrant_db import models
5
  from app.modules.question_tests_retrieval.models.text2vector import text2vector
6
 
 
 
7
 
8
  # CRUD operation
9
  def upload_file_question_tests(file):
@@ -48,6 +50,16 @@ def create_question_test(data):
48
  file_question_tests = data["question_tests_url"]
49
  # upload file to firebase storage
50
  file_url = upload_file_question_tests(file_question_tests)
 
 
 
 
 
 
 
 
 
 
51
  # add file url to data
52
  data["question_tests_url"] = file_url
53
  question_tests_des = data["question_tests_description"]
@@ -65,13 +77,6 @@ def create_question_test(data):
65
 
66
  return True
67
 
68
- def update_question_test(id, data):
69
- # Update a document by id
70
- firebase_db.collection("question_tests").document(id).update(data)
71
- # Update corrensponding vector in Qdrant
72
-
73
- return True
74
-
75
  def delete_question_test(id):
76
  # Delete a file from firebase storage
77
  file_url = get_question_test_by_id(id)["question_tests_url"]
 
4
  from app.configs.qdrant_db import models
5
  from app.modules.question_tests_retrieval.models.text2vector import text2vector
6
 
7
+ from datetime import datetime
8
+ import pytz
9
 
10
  # CRUD operation
11
  def upload_file_question_tests(file):
 
50
  file_question_tests = data["question_tests_url"]
51
  # upload file to firebase storage
52
  file_url = upload_file_question_tests(file_question_tests)
53
+
54
+ # Get the current time in UTC
55
+ utc_now = datetime.utcnow()
56
+ # Specify the Vietnam time zone
57
+ vietnam_timezone = pytz.timezone('Asia/Ho_Chi_Minh')
58
+ # Convert the current time to Vietnam time zone
59
+ vietnam_now = utc_now.replace(tzinfo=pytz.utc).astimezone(vietnam_timezone).strftime("%Y-%m-%d %H:%M:%S")
60
+ # add created_at
61
+ data["created_at"] = vietnam_now
62
+
63
  # add file url to data
64
  data["question_tests_url"] = file_url
65
  question_tests_des = data["question_tests_description"]
 
77
 
78
  return True
79
 
 
 
 
 
 
 
 
80
  def delete_question_test(id):
81
  # Delete a file from firebase storage
82
  file_url = get_question_test_by_id(id)["question_tests_url"]
app/modules/crud_rag_question_tests/__init__.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File
2
+ from typing import Annotated
3
+
4
+ from app.modules.crud_rag_question_tests.models.crud_rag_question_tests import get_all_rag_question_tests, create_rag_question_test, delete_question_test
5
+
6
+ crud_rag_question_tests_router = APIRouter(prefix="/crud_rag_question_tests_router", tags=["crud_rag_question_tests_router"])
7
+
8
+ # [GET] all question tests
9
+ @crud_rag_question_tests_router.get("/")
10
+ async def index():
11
+ # Get all documents from the collection with id document
12
+ data = get_all_rag_question_tests()
13
+ return data
14
+
15
+ # [POST] add question test
16
+ @crud_rag_question_tests_router.post("/")
17
+ # only upload pdf or json file
18
+ async def add_question_generator(id_jd: str, file_question_generator_tests: Annotated[UploadFile, File(..., description="The question generator test file", media_type=["application/json"])]):
19
+ try:
20
+ question_tests_upload_type = file_question_generator_tests.filename.split(".")[-1]
21
+ # check if file is json
22
+ if question_tests_upload_type == "json":
23
+ # create a new document
24
+ if create_rag_question_test({"id_jd": id_jd, "question_generator_tests_url": file_question_generator_tests}):
25
+ return {"message": "Question test added successfully"}
26
+ else:
27
+ return {"error": str(e)}
28
+ else:
29
+ return {"message": "File type not supported"}
30
+ except Exception as e:
31
+ return {"message": "Error", "error": str(e)}
32
+
33
+ # [DELETE] question test by id
34
+ @crud_rag_question_tests_router.delete("/{id}")
35
+ async def delete_question_test_by_id(id: str):
36
+ # Delete a document by id
37
+ if delete_question_test(id):
38
+ return {"message": f"Question test have id {id} deleted successfully"}
39
+ else:
40
+ return {"message": "Error"}
app/modules/crud_rag_question_tests/models/crud_rag_question_tests.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from app.configs.database import firebase_bucket, firebase_db
3
+
4
+ from datetime import datetime
5
+ import pytz
6
+
7
+ # CRUD operation
8
+ def upload_file_rag_question_tests(file):
9
+ re_name_file = str(uuid.uuid4()).replace("-","_") + "_" + file.filename
10
+ # upload file to firebase storage
11
+ blob = firebase_bucket.blob(re_name_file)
12
+ blob.upload_from_file(file.file)
13
+ # return gs link
14
+ return f"gs://{firebase_bucket.name}/{re_name_file}"
15
+
16
+ def remove_file_rag_question_tests(file_url):
17
+ # remove file from firebase storage using "gs://" link
18
+ blob = firebase_bucket.blob(file_url.split(f"gs://{firebase_bucket.name}/")[1])
19
+ blob.delete()
20
+ return True
21
+
22
+ def get_all_rag_question_tests():
23
+ # Get all documents from the collection
24
+ docs = firebase_db.collection("rag_question_tests").stream()
25
+ data = []
26
+ for doc in docs:
27
+ doc_data = doc.to_dict()
28
+ doc_data["id"] = doc.id
29
+ data.append(doc_data)
30
+ return data
31
+
32
+ def get_question_test_by_id(id):
33
+ # Get a document by id
34
+ doc = firebase_db.collection("rag_question_tests").document(id).get()
35
+ return doc.to_dict()
36
+
37
+ def create_rag_question_test(data):
38
+ # get file_rag_question_tests
39
+ file_rag_question_tests = data["question_generator_tests_url"]
40
+ # upload file to firebase storage
41
+ file_url = upload_file_rag_question_tests(file_rag_question_tests)
42
+
43
+ # Get the current time in UTC
44
+ utc_now = datetime.utcnow()
45
+ # Specify the Vietnam time zone
46
+ vietnam_timezone = pytz.timezone('Asia/Ho_Chi_Minh')
47
+ # Convert the current time to Vietnam time zone
48
+ vietnam_now = utc_now.replace(tzinfo=pytz.utc).astimezone(vietnam_timezone).strftime("%Y-%m-%d %H:%M:%S")
49
+ # add created_at
50
+ data["created_at"] = vietnam_now
51
+
52
+ # add file url to data
53
+ data["question_generator_tests_url"] = file_url
54
+ # Create a new document
55
+ document_ref = firebase_db.collection("rag_question_tests").add(data)
56
+ return True
57
+
58
+ def delete_question_test(id):
59
+ # Delete a file from firebase storage
60
+ file_url = get_question_test_by_id(id)["question_generator_tests_url"]
61
+ remove_file_rag_question_tests(file_url)
62
+ # Delete a document by id
63
+ firebase_db.collection("rag_question_tests").document(id).delete()
64
+ return True
app/modules/matching_cv/__init__.py CHANGED
@@ -4,8 +4,8 @@ from fastapi import APIRouter
4
  # from app.modules.matching_cv.models.match_cv_jd_model import Match_JD_CV_Model
5
 
6
  from app.modules.matching_cv.models.matching_cv_logic import result_matching_cv_jd
7
- from app.modules.crud_jds.models.crud_jds import get_jd_by_id, file_jd_txt2text
8
- from app.modules.crud_cvs.models.crud_cvs import get_cv_by_id, file_cv_doc2text
9
 
10
  cvmatching_router = APIRouter(prefix="/cvmatching", tags=["cvmatching"])
11
 
@@ -17,42 +17,10 @@ async def index():
17
  # only upload .pdf or .docx file
18
  async def matching_cv_jd(id_jd: str, id_cv:str):
19
  try:
20
- # get jd and cv by id
21
- jd_document = get_jd_by_id(id_jd)
22
- cv_document = get_cv_by_id(id_cv)
23
 
24
- # download file from firebase storage
25
- jd_url = jd_document["jd_url"]
26
- cv_url = cv_document["cv_url"]
27
-
28
- # get type file cv from cv_url "gs://bucket_name/file_name"
29
- cv_type = cv_url.split(".")[-1]
30
- if cv_type == "pdf":
31
- return {"message": "This feature is not available yet"}
32
- elif cv_type == "docx":
33
- cv_text = file_cv_doc2text(cv_url)
34
- else:
35
- return {"message": "Please upload only .pdf or .docx file for CV"}
36
-
37
- # get jd_text from jd_url "gs://bucket_name/file_name"
38
- jd_text = file_jd_txt2text(jd_url)
39
-
40
- result = result_matching_cv_jd(cv_text, jd_text)
41
  return {"result": result}
42
- # # take jd_upload and cv_upload type file
43
- # jd_upload_type = jd_upload.filename.split(".")[-1]
44
- # cv_upload_type = cv_upload.filename.split(".")[-1]
45
- # if jd_upload_type in ["txt"] and cv_upload_type in ["pdf", "docx"]:
46
- # jd_text = jd_upload.file.read().decode("utf-8")
47
- # if cv_upload_type == "docx":
48
- # cv_text = docx.Document(cv_upload.file).paragraphs
49
- # cv_text = "\n".join([i.text for i in cv_text])
50
- # elif cv_upload_type == "pdf":
51
- # return {"message": "This feature is not available yet"}
52
- # # check matching cv and jd
53
- # result = result_matching_cv_jd(cv_text, jd_text)
54
- # return {"result": result}
55
- # else:
56
- # return {"message": "Please upload only .txt for JD. And .pdf or .docx file for CV"}
57
  except Exception as e:
58
  return {"Error": str(e)}
 
4
  # from app.modules.matching_cv.models.match_cv_jd_model import Match_JD_CV_Model
5
 
6
  from app.modules.matching_cv.models.matching_cv_logic import result_matching_cv_jd
7
+ from app.modules.crud_jds.models.crud_jds import get_jd_by_id, get_jd_text_by_id
8
+ from app.modules.crud_cvs.models.crud_cvs import get_cv_by_id, file_cv_doc2text, file_cv_pdf2text
9
 
10
  cvmatching_router = APIRouter(prefix="/cvmatching", tags=["cvmatching"])
11
 
 
17
  # only upload .pdf or .docx file
18
  async def matching_cv_jd(id_jd: str, id_cv:str):
19
  try:
20
+ cv_content = get_cv_by_id(id_cv)
21
+ jd_text = get_jd_text_by_id(id_jd)
 
22
 
23
+ result = result_matching_cv_jd(cv_text=cv_content,jd_text=jd_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  return {"result": result}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  except Exception as e:
26
  return {"Error": str(e)}
app/modules/matching_cv/models/match_cv_jd_model.py DELETED
@@ -1,7 +0,0 @@
1
- from fastapi import APIRouter, UploadFile, File
2
-
3
- class Match_JD_CV_Model:
4
- jd = UploadFile
5
- jd_default = File(..., description="Upload JD file (only .txt file)", media_type=["text/plain"])
6
- cv = UploadFile
7
- cv_default = File(..., description="Upload CV file (only .pdf and .docx)", media_type=["application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"])
 
 
 
 
 
 
 
 
app/modules/matching_cv/models/matching_cv_logic.py CHANGED
@@ -6,6 +6,8 @@ from dotenv import load_dotenv
6
  from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
7
  from langchain_core.messages import SystemMessage
8
  from langchain_google_genai import ChatGoogleGenerativeAI
 
 
9
 
10
  # import the json oupput parser from the langchain core
11
  from langchain_core.output_parsers import JsonOutputParser
@@ -18,32 +20,38 @@ load_dotenv()
18
 
19
  # Define the google api key
20
  os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
 
 
21
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # def matching cv and jd return percentage of matching using prompt template
24
- def result_matching_cv_jd(cv_text, jd_text):
25
- # create the prompt template
26
- chat_template = ChatPromptTemplate.from_messages(
27
- [
28
- SystemMessage(
29
- content=(
30
- """
31
- Given the following CV and JD, calculate the percentage match between the candidate's qualifications and the job requirements:
32
- CV: {cv}
33
- JD: {jd}
34
- To determine the match percentage, analyze the skills and experience in the CV and compare them to the requirements outlined in the JD. Provide the final match percentage as a numeric value between 0-100%, along with a brief explanation of your analysis. Follow this json format: {"Skills Match": {"Required Skills": "","Candidate Skills": "","Match Percentage": "",}, "Experience Match": {"Required Experience": "","Candidate Experience": "","Match Percentage": "",}, "Overall Match Percentage:": "", "Explanation": ""}
35
- """
36
- )
37
- ),
38
- HumanMessagePromptTemplate.from_template(["{cv}", "{jd}"]),
39
- ]
40
- )
41
-
42
  # create the chat message
43
  chat_message = chat_template.format_messages(cv=cv_text, jd=jd_text)
44
-
45
- llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, convert_system_message_to_human=True, api_key=GOOGLE_API_KEY, request_timeout=120)
46
- chain = llm | parser
47
  result = chain.invoke(chat_message)
48
 
49
  return result
 
6
  from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
7
  from langchain_core.messages import SystemMessage
8
  from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from langchain_anthropic import ChatAnthropic
10
+ from langchain_openai import OpenAI
11
 
12
  # import the json oupput parser from the langchain core
13
  from langchain_core.output_parsers import JsonOutputParser
 
20
 
21
  # Define the google api key
22
  os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
23
+ os.environ['CLAUDE_API_KEY'] = os.getenv('CLAUDE_API_KEY')
24
+ os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
25
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
26
+ CLAUDE_API_KEY = os.environ.get("CLAUDE_API_KEY")
27
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
28
+
29
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, convert_system_message_to_human=True, api_key=GOOGLE_API_KEY, request_timeout=120)
30
+ # llm = ChatAnthropic(temperature=0.3, model_name="claude-3-opus-20240229", anthropic_api_key=CLAUDE_API_KEY, default_request_timeout=120)
31
+ # llm = OpenAI(model_name="gpt-3.5-turbo-0125", openai_api_key=OPENAI_API_KEY)
32
+ chain = llm | parser
33
+
34
+ # create the prompt template
35
+ chat_template = ChatPromptTemplate.from_messages(
36
+ [
37
+ SystemMessage(
38
+ content=(
39
+ """
40
+ Given the following CV and JD, calculate the percentage match between the candidate's qualifications and the job requirements:
41
+ CV: {cv}
42
+ JD: {jd}
43
+ To determine the match percentage, analyze the skills and experience in the CV and compare them to the requirements outlined in the JD. Provide the final match percentage as a numeric value between 0-100%, along with a brief explanation of your analysis. Follow this json format: {"Skills Match": {"Required Skills": "","Candidate Skills": "","Match Percentage": "",}, "Experience Match": {"Required Experience": "","Candidate Experience": "","Match Percentage": "",}, "Overall Match Percentage:": "", "Explanation": ""}
44
+ """
45
+ )
46
+ ),
47
+ HumanMessagePromptTemplate.from_template(["{cv}", "{jd}"]),
48
+ ]
49
+ )
50
 
51
  # def matching cv and jd return percentage of matching using prompt template
52
+ def result_matching_cv_jd(cv_text:str, jd_text:str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # create the chat message
54
  chat_message = chat_template.format_messages(cv=cv_text, jd=jd_text)
 
 
 
55
  result = chain.invoke(chat_message)
56
 
57
  return result
app/modules/question_tests_retrieval/__init__.py CHANGED
@@ -1,9 +1,8 @@
1
- from fastapi import APIRouter, UploadFile, File
2
- from typing import Annotated
3
 
4
  from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
5
  from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_tests
6
- from app.modules.crud_jds.models.crud_jds import get_jd_by_id, file_jd_txt2text
7
 
8
  qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
9
 
@@ -15,15 +14,13 @@ async def index():
15
  # only upload .txt file
16
  async def send_jd_to_get_question(id_jd: str):
17
  try:
18
- jd_document = get_jd_by_id(id_jd)
19
- # download jd file from firebase storage
20
- jd_file_string = file_jd_txt2text(jd_document["jd_url"])
21
- sumaryjd_text = jobdes2text(jd_file_string)
22
  if get_question_tests(sumaryjd_text):
23
  return {"message": "Send JD successfully and get question test successfully",
24
- "sumaryjd_text": sumaryjd_text}
25
  else:
26
- return {"message": "Please upload only .txt file", "error": str(e)}
27
  except Exception as e:
28
  return {"message": "Have error when find JD in database", "error": str(e)}
29
-
 
1
+ from fastapi import APIRouter
 
2
 
3
  from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
4
  from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_tests
5
+ from app.modules.crud_jds.models.crud_jds import get_jd_by_id, get_jd_text_by_id
6
 
7
  qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
8
 
 
14
  # only upload .txt file
15
  async def send_jd_to_get_question(id_jd: str):
16
  try:
17
+ # get jd_text by id
18
+ jd_text = get_jd_text_by_id(id_jd)
19
+ sumaryjd_text = jobdes2text(jd_text)
 
20
  if get_question_tests(sumaryjd_text):
21
  return {"message": "Send JD successfully and get question test successfully",
22
+ "sumary JD": sumaryjd_text}
23
  else:
24
+ return {"error": str(e)}
25
  except Exception as e:
26
  return {"message": "Have error when find JD in database", "error": str(e)}
 
app/modules/question_tests_retrieval/models/jd2text.py CHANGED
@@ -2,6 +2,8 @@ from langchain_google_genai import ChatGoogleGenerativeAI
2
  from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
3
  from langchain_core.messages import SystemMessage
4
  from langchain_core.output_parsers import JsonOutputParser
 
 
5
  import os
6
  from dotenv import load_dotenv
7
 
@@ -10,33 +12,33 @@ load_dotenv()
10
 
11
  # Define the google api key
12
  os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
 
13
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 
14
 
15
  # define the parser object
16
  parser = JsonOutputParser()
17
 
18
- def jobdes2text(jobdes):
19
- # setup the gemini pro
20
- llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, convert_system_message_to_human=True, api_key=GOOGLE_API_KEY, request_timeout=120)
21
-
22
- # create the prompt template
23
- finnal_jd_chat_template = ChatPromptTemplate.from_messages(
24
- [
25
- SystemMessage(
26
- content=(
27
- """Return Job title, level(Fresher, Junior, Senior, ...) and Brief summary of required skills about 20 words from the job description. Use the following format: Job Title is {job title}, Level is {level}, and Brief summary of required skills is {brief summary of required skills}."""
28
- )
29
- ),
30
- HumanMessagePromptTemplate.from_template("{text}"),
31
- ]
32
- )
33
-
34
  # create the chat message
35
  chat_message = finnal_jd_chat_template.format_messages(text=jobdes)
36
-
37
  # create a chain
38
  chain = llm
39
-
40
  result = chain.invoke(chat_message)
41
-
42
  return result.content
 
2
  from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
3
  from langchain_core.messages import SystemMessage
4
  from langchain_core.output_parsers import JsonOutputParser
5
+ from langchain_anthropic import ChatAnthropic
6
+
7
  import os
8
  from dotenv import load_dotenv
9
 
 
12
 
13
  # Define the google api key
14
  os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
15
+ os.environ['CLAUDE_API_KEY'] = os.getenv('CLAUDE_API_KEY')
16
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
17
+ CLAUDE_API_KEY = os.environ.get("CLAUDE_API_KEY")
18
 
19
  # define the parser object
20
  parser = JsonOutputParser()
21
 
22
+ # setup the gemini pro
23
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, convert_system_message_to_human=True, api_key=GOOGLE_API_KEY, request_timeout=120)
24
+ # llm = ChatAnthropic(temperature=0, anthropic_api_key=CLAUDE_API_KEY, model_name="claude-3-opus-20240229")
25
+ # create the prompt template
26
+ finnal_jd_chat_template = ChatPromptTemplate.from_messages(
27
+ [
28
+ SystemMessage(
29
+ content=(
30
+ """Return Job title, level(Fresher, Junior, Senior, ...) and Brief summary of required skills about 20 words from the job description. Use the following format: Job Title is {job title}, Level is {level}, and Brief summary of required skills is {brief summary of required skills}."""
31
+ )
32
+ ),
33
+ HumanMessagePromptTemplate.from_template("{text}"),
34
+ ]
35
+ )
36
+
37
+ def jobdes2text(jobdes: str):
38
  # create the chat message
39
  chat_message = finnal_jd_chat_template.format_messages(text=jobdes)
 
40
  # create a chain
41
  chain = llm
 
42
  result = chain.invoke(chat_message)
43
+
44
  return result.content
app/modules/question_tests_retrieval/models/question_tests_logic.py CHANGED
@@ -20,21 +20,6 @@ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
20
  # Setting model embedding
21
  embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY, request_timeout=120)
22
  gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
23
-
24
- # def compare_vector(vector_extract, vector_des):
25
- # maxnimun_value = 2
26
- # for item in vector_des:
27
- # two_object = (vector_extract, item)
28
- # x = gemini_evaluator.evaluate_strings(prediction=two_object[0], reference=two_object[1])
29
- # if x.get('score') < maxnimun_value:
30
- # maxnimun_value = x.get('score')
31
- # des_item_choose = item
32
- # if maxnimun_value == 2:
33
- # return False
34
- # elif maxnimun_value < 0.3:
35
- # return des_item_choose
36
- # else:
37
- # return False
38
 
39
  def compare_vector(description_vector, max_number_of_points=3):
40
  similarity_list = qdrant_client.search(
@@ -51,17 +36,7 @@ def compare_vector(description_vector, max_number_of_points=3):
51
 
52
  return formatted_similarity_list
53
 
54
- # def download_question_test(question_test_url):
55
- # # check folder exist
56
- # if not os.path.exists('data/question_tests'):
57
- # os.makedirs('data/question_tests')
58
- # # download file from firebase storage using "gs://" link
59
- # name_bucket = question_test_url.split(f"gs://{firebase_bucket.name}/")[1]
60
- # blob = firebase_bucket.blob(name_bucket)
61
- # blob.download_to_filename(f'data/question_tests/{name_bucket}')
62
- # return True
63
-
64
- def download_question_test(question_test_url_list):
65
  # check folder exist
66
  if not os.path.exists('data/question_tests'):
67
  os.makedirs('data/question_tests')
@@ -73,23 +48,7 @@ def download_question_test(question_test_url_list):
73
 
74
  return True
75
 
76
-
77
- # def get_question_test(text):
78
- # all_question_tests = get_all_question_tests()
79
- # value_in_des = []
80
- # for item in all_question_tests:
81
- # value_in_des.append(item['question_tests_description'])
82
- # des_item_choose = compare_vector(text, value_in_des)
83
- # if des_item_choose == False:
84
- # return "No question test found"
85
- # else:
86
- # question_test_url = get_question_test_url_by_description(des_item_choose)
87
- # if download_question_test(question_test_url):
88
- # return True
89
- # else:
90
- # return False
91
-
92
- def get_question_tests(text):
93
  # Get formatted similarity list
94
  formatted_similarity_list = compare_vector(text2vector(text))
95
  # Get corresponding document url in Firebase and download them
 
20
  # Setting model embedding
21
  embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY, request_timeout=120)
22
  gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def compare_vector(description_vector, max_number_of_points=3):
25
  similarity_list = qdrant_client.search(
 
36
 
37
  return formatted_similarity_list
38
 
39
+ def download_question_test(question_test_url_list: list):
 
 
 
 
 
 
 
 
 
 
40
  # check folder exist
41
  if not os.path.exists('data/question_tests'):
42
  os.makedirs('data/question_tests')
 
48
 
49
  return True
50
 
51
+ def get_question_tests(text: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # Get formatted similarity list
53
  formatted_similarity_list = compare_vector(text2vector(text))
54
  # Get corresponding document url in Firebase and download them
app/modules/question_tests_retrieval/models/text2vector.py CHANGED
@@ -8,8 +8,8 @@ load_dotenv()
8
  # Define the google api key
9
  os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
10
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 
11
 
12
- def text2vector(text):
13
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY, request_timeout=120)
14
  vector = embeddings.embed_query(text)
15
  return vector
 
8
  # Define the google api key
9
  os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
10
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
11
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY, request_timeout=120)
12
 
13
+ def text2vector(text: str):
 
14
  vector = embeddings.embed_query(text)
15
  return vector
scrapping.py DELETED
@@ -1,8 +0,0 @@
1
- from bs4 import BeautifulSoup
2
- import requests
3
-
4
- url = "https://en.wikipedia.org/wiki/List_of_largest_companies_by_revenue"
5
- page = requests.get(url)
6
- soup = BeautifulSoup(page.text, 'html')
7
-
8
- print(soup)
 
 
 
 
 
 
 
 
 
tmp/.gitkeep ADDED
File without changes