Spaces:
Build error
Build error
Merge pull request #31 from fsa-simpleqt/HuyDN
Browse files- app/configs/database.py +7 -1
- app/configs/qdrant_db.py +14 -7
- app/modules/__init__.py +2 -0
- app/modules/crud_cvs/__init__.py +1 -1
- app/modules/crud_cvs/models/crud_cvs.py +65 -23
- app/modules/crud_jds/__init__.py +2 -2
- app/modules/crud_jds/models/crud_jds.py +36 -15
- app/modules/crud_question_test/__init__.py +7 -6
- app/modules/crud_question_test/models/crud_question_tests.py +12 -7
- app/modules/crud_rag_question_tests/__init__.py +40 -0
- app/modules/crud_rag_question_tests/models/crud_rag_question_tests.py +64 -0
- app/modules/matching_cv/__init__.py +5 -37
- app/modules/matching_cv/models/match_cv_jd_model.py +0 -7
- app/modules/matching_cv/models/matching_cv_logic.py +29 -21
- app/modules/question_tests_retrieval/__init__.py +7 -10
- app/modules/question_tests_retrieval/models/jd2text.py +21 -19
- app/modules/question_tests_retrieval/models/question_tests_logic.py +2 -43
- app/modules/question_tests_retrieval/models/text2vector.py +2 -2
- scrapping.py +0 -8
- tmp/.gitkeep +0 -0
app/configs/database.py
CHANGED
@@ -58,4 +58,10 @@ if "cvs" not in [collection.id for collection in firebase_db.collections()]:
|
|
58 |
if "jds" not in [collection.id for collection in firebase_db.collections()]:
|
59 |
# Create a collection
|
60 |
firebase_db.collection("jds").add({"position_applied_for": "test","jd_url": "test"})
|
61 |
-
print("Collection jds created")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
if "jds" not in [collection.id for collection in firebase_db.collections()]:
|
59 |
# Create a collection
|
60 |
firebase_db.collection("jds").add({"position_applied_for": "test","jd_url": "test"})
|
61 |
+
print("Collection jds created")
|
62 |
+
|
63 |
+
# check if have rag_question_tests collection
|
64 |
+
if "rag_question_tests" not in [collection.id for collection in firebase_db.collections()]:
|
65 |
+
# Create a collection
|
66 |
+
firebase_db.collection("rag_question_tests").add({"id_jd": "test","question_generator_tests_url": "test"})
|
67 |
+
print("Collection rag_question_tests created")
|
app/configs/qdrant_db.py
CHANGED
@@ -10,14 +10,21 @@ qdrant_client = QdrantClient(
|
|
10 |
url = os.getenv("QDRANT_URL"),
|
11 |
api_key = os.getenv("QDRANT_API_KEY"),
|
12 |
)
|
|
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
except Exception as e:
|
17 |
qdrant_client.create_collection(
|
18 |
collection_name="question_tests",
|
19 |
vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
|
20 |
-
)
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
url = os.getenv("QDRANT_URL"),
|
11 |
api_key = os.getenv("QDRANT_API_KEY"),
|
12 |
)
|
13 |
+
print("Qdrant Database connected")
|
14 |
|
15 |
+
# 2. Check if the question_tests exists
|
16 |
+
if qdrant_client.collection_exists('question_tests') == False:
|
|
|
17 |
qdrant_client.create_collection(
|
18 |
collection_name="question_tests",
|
19 |
vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
|
20 |
+
)
|
21 |
+
print("Collection question_tests created")
|
22 |
+
# 3. Check if the rag_documents_test exists
|
23 |
+
elif qdrant_client.collection_exists('rag_documents_test') == False:
|
24 |
+
qdrant_client.create_collection(
|
25 |
+
collection_name="rag_documents_test",
|
26 |
+
vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
|
27 |
+
)
|
28 |
+
print("Collection rag_documents_test created")
|
29 |
+
else:
|
30 |
+
print("Collections already exist")
|
app/modules/__init__.py
CHANGED
@@ -6,6 +6,7 @@ from app.modules.matching_cv import cvmatching_router
|
|
6 |
from app.modules.crud_question_test import crud_question_tests_router
|
7 |
from app.modules.crud_cvs import crud_cvs_router
|
8 |
from app.modules.crud_jds import crud_jds_router
|
|
|
9 |
from app.modules.question_rag import quiz_gen_router
|
10 |
|
11 |
modules_router = APIRouter(prefix="/modules", tags=["modules"])
|
@@ -14,6 +15,7 @@ modules_router.include_router(cvmatching_router)
|
|
14 |
modules_router.include_router(crud_question_tests_router)
|
15 |
modules_router.include_router(crud_cvs_router)
|
16 |
modules_router.include_router(crud_jds_router)
|
|
|
17 |
modules_router.include_router(quiz_gen_router)
|
18 |
|
19 |
@modules_router.get("/")
|
|
|
6 |
from app.modules.crud_question_test import crud_question_tests_router
|
7 |
from app.modules.crud_cvs import crud_cvs_router
|
8 |
from app.modules.crud_jds import crud_jds_router
|
9 |
+
from app.modules.crud_rag_question_tests import crud_rag_question_tests_router
|
10 |
from app.modules.question_rag import quiz_gen_router
|
11 |
|
12 |
modules_router = APIRouter(prefix="/modules", tags=["modules"])
|
|
|
15 |
modules_router.include_router(crud_question_tests_router)
|
16 |
modules_router.include_router(crud_cvs_router)
|
17 |
modules_router.include_router(crud_jds_router)
|
18 |
+
modules_router.include_router(crud_rag_question_tests_router)
|
19 |
modules_router.include_router(quiz_gen_router)
|
20 |
|
21 |
@modules_router.get("/")
|
app/modules/crud_cvs/__init__.py
CHANGED
@@ -21,7 +21,7 @@ async def add_cv(name_candidate: str, apply_position: str, file_cv: Annotated[Up
|
|
21 |
file_cv_type = file_cv.filename.split(".")[-1]
|
22 |
if file_cv_type in ["pdf", "docx"]:
|
23 |
# create a new document
|
24 |
-
if create_cv({"name_candidate": name_candidate, "apply_position":apply_position, "
|
25 |
return {"message": "CV added successfully"}
|
26 |
else:
|
27 |
return {"message": "Error while adding CV file to database"}
|
|
|
21 |
file_cv_type = file_cv.filename.split(".")[-1]
|
22 |
if file_cv_type in ["pdf", "docx"]:
|
23 |
# create a new document
|
24 |
+
if create_cv({"name_candidate": name_candidate, "apply_position":apply_position, "cv_content": file_cv}):
|
25 |
return {"message": "CV added successfully"}
|
26 |
else:
|
27 |
return {"message": "Error while adding CV file to database"}
|
app/modules/crud_cvs/models/crud_cvs.py
CHANGED
@@ -1,16 +1,22 @@
|
|
1 |
import uuid
|
2 |
-
|
3 |
import io
|
|
|
|
|
|
|
4 |
from docx import Document
|
|
|
|
|
|
|
5 |
|
6 |
# CRUD operation
|
7 |
-
def upload_file_cvs(
|
8 |
-
|
9 |
-
|
10 |
-
blob = firebase_bucket.blob(
|
11 |
-
blob.
|
12 |
# return gs link
|
13 |
-
return f"gs://{firebase_bucket.name}/{
|
14 |
|
15 |
def remove_file_cvs(file_url):
|
16 |
# remove file from firebase storage using "gs://" link
|
@@ -18,22 +24,28 @@ def remove_file_cvs(file_url):
|
|
18 |
blob.delete()
|
19 |
return True
|
20 |
|
21 |
-
def file_cv_doc2text(
|
22 |
-
#
|
23 |
-
|
24 |
-
# download file and return string in file
|
25 |
-
file_bytes = blob.download_as_bytes()
|
26 |
-
# Create a BytesIO object from the file bytes
|
27 |
-
file_stream = io.BytesIO(file_bytes)
|
28 |
-
# Read the .docx file from the BytesIO object
|
29 |
-
doc = Document(file_stream)
|
30 |
# Extract text from the .docx file
|
31 |
text = ""
|
32 |
for paragraph in doc.paragraphs:
|
33 |
text += paragraph.text + "\n"
|
34 |
-
|
35 |
return text
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def get_all_cvs():
|
39 |
# Get all documents from the collection
|
@@ -51,15 +63,45 @@ def get_cv_by_id(id):
|
|
51 |
return doc.to_dict()
|
52 |
|
53 |
def create_cv(data):
|
54 |
-
# get
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
# upload file to firebase storage
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
# add file url to data
|
59 |
-
data["cv_url"] =
|
|
|
|
|
|
|
|
|
60 |
# Create a new document
|
61 |
-
|
62 |
-
# document_id = document_ref[1].id
|
63 |
return True
|
64 |
|
65 |
def delete_cv(id):
|
|
|
1 |
import uuid
|
2 |
+
import pytz
|
3 |
import io
|
4 |
+
import os
|
5 |
+
|
6 |
+
from app.configs.database import firebase_bucket, firebase_db
|
7 |
from docx import Document
|
8 |
+
from datetime import datetime
|
9 |
+
|
10 |
+
from langchain_community.document_loaders import UnstructuredPDFLoader
|
11 |
|
12 |
# CRUD operation
|
13 |
+
def upload_file_cvs(file_path):
|
14 |
+
# upload file to firebase storage from file_path
|
15 |
+
name_file = file_path.split("/")[-1]
|
16 |
+
blob = firebase_bucket.blob(name_file)
|
17 |
+
blob.upload_from_filename(file_path)
|
18 |
# return gs link
|
19 |
+
return f"gs://{firebase_bucket.name}/{name_file}"
|
20 |
|
21 |
def remove_file_cvs(file_url):
|
22 |
# remove file from firebase storage using "gs://" link
|
|
|
24 |
blob.delete()
|
25 |
return True
|
26 |
|
27 |
+
def file_cv_doc2text(file_path):
|
28 |
+
# Read the .docx file from file
|
29 |
+
doc = Document(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
# Extract text from the .docx file
|
31 |
text = ""
|
32 |
for paragraph in doc.paragraphs:
|
33 |
text += paragraph.text + "\n"
|
|
|
34 |
return text
|
35 |
|
36 |
+
# def load cv from docx file
|
37 |
+
def file_cv_pdf2text(file_path):
|
38 |
+
# Read the .pdf file from the BytesIO object
|
39 |
+
loader = UnstructuredPDFLoader(file_path)
|
40 |
+
json_result = loader.load()
|
41 |
+
# take page_content from json_result
|
42 |
+
page_content = json_result[0].page_content
|
43 |
+
return page_content
|
44 |
+
|
45 |
+
def get_cv_content_by_id(id_cv):
|
46 |
+
# Get a document by id
|
47 |
+
doc = firebase_db.collection("cvs").document(id_cv).get()
|
48 |
+
return doc.to_dict()["cv_content"]
|
49 |
|
50 |
def get_all_cvs():
|
51 |
# Get all documents from the collection
|
|
|
63 |
return doc.to_dict()
|
64 |
|
65 |
def create_cv(data):
|
66 |
+
# get file_cv
|
67 |
+
file_cv = data["cv_content"]
|
68 |
+
# rename file name to uuid
|
69 |
+
re_name_file = str(uuid.uuid4()).replace("-","_") + "_" + file_cv.filename
|
70 |
+
# save uploaded file to tmp folder
|
71 |
+
cache_path = f"tmp/{re_name_file}"
|
72 |
+
with open(cache_path, "wb") as buffer:
|
73 |
+
buffer.write(file_cv.file.read())
|
74 |
+
|
75 |
+
# take file_cv and cv_upload type file
|
76 |
+
file_cv_type = file_cv.filename.split(".")[-1]
|
77 |
+
cv_text = ""
|
78 |
+
if file_cv_type == "pdf":
|
79 |
+
cv_text = file_cv_pdf2text(cache_path)
|
80 |
+
elif file_cv_type == "docx":
|
81 |
+
cv_text = file_cv_doc2text(cache_path)
|
82 |
+
else:
|
83 |
+
return False
|
84 |
+
|
85 |
# upload file to firebase storage
|
86 |
+
cv_uploaded_url = upload_file_cvs(cache_path)
|
87 |
+
# delete file in tmp folder
|
88 |
+
os.remove(cache_path)
|
89 |
+
|
90 |
+
# Get the current time in UTC
|
91 |
+
utc_now = datetime.utcnow()
|
92 |
+
# Specify the Vietnam time zone
|
93 |
+
vietnam_timezone = pytz.timezone('Asia/Ho_Chi_Minh')
|
94 |
+
# Convert the current time to Vietnam time zone
|
95 |
+
vietnam_now = utc_now.replace(tzinfo=pytz.utc).astimezone(vietnam_timezone).strftime("%Y-%m-%d %H:%M:%S")
|
96 |
+
|
97 |
# add file url to data
|
98 |
+
data["cv_url"] = cv_uploaded_url
|
99 |
+
# add cv_content
|
100 |
+
data["cv_content"] = cv_text
|
101 |
+
# add created_at
|
102 |
+
data["created_at"] = vietnam_now
|
103 |
# Create a new document
|
104 |
+
firebase_db.collection("cvs").add(data)
|
|
|
105 |
return True
|
106 |
|
107 |
def delete_cv(id):
|
app/modules/crud_jds/__init__.py
CHANGED
@@ -14,13 +14,13 @@ async def index():
|
|
14 |
|
15 |
# [POST] add JD
|
16 |
@crud_jds_router.post("/")
|
17 |
-
# only upload
|
18 |
async def add_jd(position_applied_for: str, file_jd: Annotated[UploadFile, File(..., description="Upload jd file (upload .txt)")]):
|
19 |
try:
|
20 |
file_jd_type = file_jd.filename.split(".")[-1]
|
21 |
if file_jd_type in ["txt"]:
|
22 |
# create a new document
|
23 |
-
if create_jd({"position_applied_for": position_applied_for,"
|
24 |
return {"message": "JD added successfully"}
|
25 |
else:
|
26 |
return {"message": "Error while adding JD file to database"}
|
|
|
14 |
|
15 |
# [POST] add JD
|
16 |
@crud_jds_router.post("/")
|
17 |
+
# only upload txt file
|
18 |
async def add_jd(position_applied_for: str, file_jd: Annotated[UploadFile, File(..., description="Upload jd file (upload .txt)")]):
|
19 |
try:
|
20 |
file_jd_type = file_jd.filename.split(".")[-1]
|
21 |
if file_jd_type in ["txt"]:
|
22 |
# create a new document
|
23 |
+
if create_jd({"position_applied_for": position_applied_for,"jd_text": file_jd}):
|
24 |
return {"message": "JD added successfully"}
|
25 |
else:
|
26 |
return {"message": "Error while adding JD file to database"}
|
app/modules/crud_jds/models/crud_jds.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import uuid
|
2 |
from app.configs.database import firebase_bucket, firebase_db
|
3 |
-
|
|
|
|
|
4 |
|
5 |
# CRUD operation
|
6 |
def upload_file_jds(file):
|
@@ -17,11 +19,10 @@ def remove_file_jds(file_url):
|
|
17 |
blob.delete()
|
18 |
return True
|
19 |
|
20 |
-
def
|
21 |
-
#
|
22 |
-
|
23 |
-
|
24 |
-
return blob.download_as_text()
|
25 |
|
26 |
def get_all_jds():
|
27 |
# Get all documents from the collection
|
@@ -40,20 +41,40 @@ def get_jd_by_id(id):
|
|
40 |
|
41 |
def create_jd(data):
|
42 |
# get file_jds
|
43 |
-
file_jds = data["
|
44 |
-
#
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
# add file url to data
|
47 |
-
data["
|
|
|
|
|
48 |
# Create a new document
|
49 |
-
|
50 |
-
# document_id = document_ref[1].id
|
51 |
return True
|
52 |
|
53 |
def delete_jd(id):
|
54 |
-
# Delete a file from firebase storage
|
55 |
-
file_url = get_jd_by_id(id)["jd_url"]
|
56 |
-
remove_file_jds(file_url)
|
57 |
# Delete a document by id
|
58 |
firebase_db.collection("jds").document(id).delete()
|
59 |
return True
|
|
|
1 |
import uuid
|
2 |
from app.configs.database import firebase_bucket, firebase_db
|
3 |
+
from datetime import datetime
|
4 |
+
import pytz
|
5 |
+
import os
|
6 |
|
7 |
# CRUD operation
|
8 |
def upload_file_jds(file):
|
|
|
19 |
blob.delete()
|
20 |
return True
|
21 |
|
22 |
+
def get_jd_text_by_id(id_jd):
|
23 |
+
# Get a document by id
|
24 |
+
doc = firebase_db.collection("jds").document(id_jd).get()
|
25 |
+
return doc.to_dict()["jd_text"]
|
|
|
26 |
|
27 |
def get_all_jds():
|
28 |
# Get all documents from the collection
|
|
|
41 |
|
42 |
def create_jd(data):
|
43 |
# get file_jds
|
44 |
+
file_jds = data["jd_text"]
|
45 |
+
# change file name to uuid
|
46 |
+
re_name_file = str(uuid.uuid4()).replace("-","_") + "_" + file_jds.filename
|
47 |
+
# save uploaded file to tmp folder
|
48 |
+
with open(f"tmp/{re_name_file}", "wb") as buffer:
|
49 |
+
buffer.write(file_jds.file.read())
|
50 |
+
# read file
|
51 |
+
with open(f"tmp/{re_name_file}", "r", encoding="utf8") as file:
|
52 |
+
jd_text = file.read()
|
53 |
+
# delete file in tmp folder
|
54 |
+
os.remove(f"tmp/{re_name_file}")
|
55 |
+
|
56 |
+
# # upload file to firebase storage
|
57 |
+
# file_url = upload_file_jds(file_jds)
|
58 |
+
|
59 |
+
# Get the current time in UTC
|
60 |
+
utc_now = datetime.utcnow()
|
61 |
+
# Specify the Vietnam time zone
|
62 |
+
vietnam_timezone = pytz.timezone('Asia/Ho_Chi_Minh')
|
63 |
+
# Convert the current time to Vietnam time zone
|
64 |
+
vietnam_now = utc_now.replace(tzinfo=pytz.utc).astimezone(vietnam_timezone).strftime("%Y-%m-%d %H:%M:%S")
|
65 |
+
|
66 |
# add file url to data
|
67 |
+
data["jd_text"] = jd_text
|
68 |
+
# add created_at
|
69 |
+
data["created_at"] = vietnam_now
|
70 |
# Create a new document
|
71 |
+
firebase_db.collection("jds").add(data)
|
|
|
72 |
return True
|
73 |
|
74 |
def delete_jd(id):
|
75 |
+
# # Delete a file from firebase storage
|
76 |
+
# file_url = get_jd_by_id(id)["jd_url"]
|
77 |
+
# remove_file_jds(file_url)
|
78 |
# Delete a document by id
|
79 |
firebase_db.collection("jds").document(id).delete()
|
80 |
return True
|
app/modules/crud_question_test/__init__.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from fastapi import APIRouter, UploadFile, File
|
2 |
from typing import Annotated
|
3 |
|
4 |
-
from app.modules.crud_question_test.models.crud_question_tests import get_all_question_tests,
|
5 |
|
6 |
crud_question_tests_router = APIRouter(prefix="/crud_question_tests_router", tags=["crud_question_tests_router"])
|
7 |
|
@@ -15,21 +15,22 @@ async def index():
|
|
15 |
# [POST] add question test
|
16 |
@crud_question_tests_router.post("/")
|
17 |
# only upload pdf or json file
|
18 |
-
async def add_question_test(description: str, role: str, file_question_tests: Annotated[UploadFile, File(..., description="The question test file", media_type=["application/pdf", "application/json"])]):
|
19 |
try:
|
|
|
20 |
# check if file is pdf or json
|
21 |
-
if
|
22 |
# create a new document
|
23 |
if create_question_test({"question_tests_description": description, "question_tests_role": role, "question_tests_url": file_question_tests}):
|
24 |
return {"message": "Question test added successfully"}
|
25 |
else:
|
26 |
-
return {"message": "Error"}
|
27 |
-
elif
|
28 |
# create a new document
|
29 |
if create_question_test({"question_tests_description": description, "question_tests_role": role, "question_tests_url": file_question_tests}):
|
30 |
return {"message": "Question test added successfully"}
|
31 |
else:
|
32 |
-
return {"message": "Error"}
|
33 |
else:
|
34 |
return {"message": "File type not supported"}
|
35 |
except Exception as e:
|
|
|
1 |
from fastapi import APIRouter, UploadFile, File
|
2 |
from typing import Annotated
|
3 |
|
4 |
+
from app.modules.crud_question_test.models.crud_question_tests import get_all_question_tests, create_question_test, delete_question_test
|
5 |
|
6 |
crud_question_tests_router = APIRouter(prefix="/crud_question_tests_router", tags=["crud_question_tests_router"])
|
7 |
|
|
|
15 |
# [POST] add question test
|
16 |
@crud_question_tests_router.post("/")
|
17 |
# only upload pdf or json file
|
18 |
+
async def add_question_test(description: str, role: str, file_question_tests: Annotated[UploadFile, File(..., description="The question test file (Upload .pdf or .json)", media_type=["application/pdf", "application/json"])]):
|
19 |
try:
|
20 |
+
question_tests_upload_type = file_question_tests.filename.split(".")[-1]
|
21 |
# check if file is pdf or json
|
22 |
+
if question_tests_upload_type == "pdf":
|
23 |
# create a new document
|
24 |
if create_question_test({"question_tests_description": description, "question_tests_role": role, "question_tests_url": file_question_tests}):
|
25 |
return {"message": "Question test added successfully"}
|
26 |
else:
|
27 |
+
return {"message": "Error", "error": str(e)}
|
28 |
+
elif question_tests_upload_type == "json":
|
29 |
# create a new document
|
30 |
if create_question_test({"question_tests_description": description, "question_tests_role": role, "question_tests_url": file_question_tests}):
|
31 |
return {"message": "Question test added successfully"}
|
32 |
else:
|
33 |
+
return {"message": "Error", "error": str(e)}
|
34 |
else:
|
35 |
return {"message": "File type not supported"}
|
36 |
except Exception as e:
|
app/modules/crud_question_test/models/crud_question_tests.py
CHANGED
@@ -4,6 +4,8 @@ from app.configs.qdrant_db import qdrant_client
|
|
4 |
from app.configs.qdrant_db import models
|
5 |
from app.modules.question_tests_retrieval.models.text2vector import text2vector
|
6 |
|
|
|
|
|
7 |
|
8 |
# CRUD operation
|
9 |
def upload_file_question_tests(file):
|
@@ -48,6 +50,16 @@ def create_question_test(data):
|
|
48 |
file_question_tests = data["question_tests_url"]
|
49 |
# upload file to firebase storage
|
50 |
file_url = upload_file_question_tests(file_question_tests)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
# add file url to data
|
52 |
data["question_tests_url"] = file_url
|
53 |
question_tests_des = data["question_tests_description"]
|
@@ -65,13 +77,6 @@ def create_question_test(data):
|
|
65 |
|
66 |
return True
|
67 |
|
68 |
-
def update_question_test(id, data):
|
69 |
-
# Update a document by id
|
70 |
-
firebase_db.collection("question_tests").document(id).update(data)
|
71 |
-
# Update corrensponding vector in Qdrant
|
72 |
-
|
73 |
-
return True
|
74 |
-
|
75 |
def delete_question_test(id):
|
76 |
# Delete a file from firebase storage
|
77 |
file_url = get_question_test_by_id(id)["question_tests_url"]
|
|
|
4 |
from app.configs.qdrant_db import models
|
5 |
from app.modules.question_tests_retrieval.models.text2vector import text2vector
|
6 |
|
7 |
+
from datetime import datetime
|
8 |
+
import pytz
|
9 |
|
10 |
# CRUD operation
|
11 |
def upload_file_question_tests(file):
|
|
|
50 |
file_question_tests = data["question_tests_url"]
|
51 |
# upload file to firebase storage
|
52 |
file_url = upload_file_question_tests(file_question_tests)
|
53 |
+
|
54 |
+
# Get the current time in UTC
|
55 |
+
utc_now = datetime.utcnow()
|
56 |
+
# Specify the Vietnam time zone
|
57 |
+
vietnam_timezone = pytz.timezone('Asia/Ho_Chi_Minh')
|
58 |
+
# Convert the current time to Vietnam time zone
|
59 |
+
vietnam_now = utc_now.replace(tzinfo=pytz.utc).astimezone(vietnam_timezone).strftime("%Y-%m-%d %H:%M:%S")
|
60 |
+
# add created_at
|
61 |
+
data["created_at"] = vietnam_now
|
62 |
+
|
63 |
# add file url to data
|
64 |
data["question_tests_url"] = file_url
|
65 |
question_tests_des = data["question_tests_description"]
|
|
|
77 |
|
78 |
return True
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
def delete_question_test(id):
|
81 |
# Delete a file from firebase storage
|
82 |
file_url = get_question_test_by_id(id)["question_tests_url"]
|
app/modules/crud_rag_question_tests/__init__.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, UploadFile, File
|
2 |
+
from typing import Annotated
|
3 |
+
|
4 |
+
from app.modules.crud_rag_question_tests.models.crud_rag_question_tests import get_all_rag_question_tests, create_rag_question_test, delete_question_test
|
5 |
+
|
6 |
+
crud_rag_question_tests_router = APIRouter(prefix="/crud_rag_question_tests_router", tags=["crud_rag_question_tests_router"])
|
7 |
+
|
8 |
+
# [GET] all question tests
|
9 |
+
@crud_rag_question_tests_router.get("/")
|
10 |
+
async def index():
|
11 |
+
# Get all documents from the collection with id document
|
12 |
+
data = get_all_rag_question_tests()
|
13 |
+
return data
|
14 |
+
|
15 |
+
# [POST] add question test
|
16 |
+
@crud_rag_question_tests_router.post("/")
|
17 |
+
# only upload pdf or json file
|
18 |
+
async def add_question_generator(id_jd: str, file_question_generator_tests: Annotated[UploadFile, File(..., description="The question generator test file", media_type=["application/json"])]):
|
19 |
+
try:
|
20 |
+
question_tests_upload_type = file_question_generator_tests.filename.split(".")[-1]
|
21 |
+
# check if file is json
|
22 |
+
if question_tests_upload_type == "json":
|
23 |
+
# create a new document
|
24 |
+
if create_rag_question_test({"id_jd": id_jd, "question_generator_tests_url": file_question_generator_tests}):
|
25 |
+
return {"message": "Question test added successfully"}
|
26 |
+
else:
|
27 |
+
return {"error": str(e)}
|
28 |
+
else:
|
29 |
+
return {"message": "File type not supported"}
|
30 |
+
except Exception as e:
|
31 |
+
return {"message": "Error", "error": str(e)}
|
32 |
+
|
33 |
+
# [DELETE] question test by id
|
34 |
+
@crud_rag_question_tests_router.delete("/{id}")
|
35 |
+
async def delete_question_test_by_id(id: str):
|
36 |
+
# Delete a document by id
|
37 |
+
if delete_question_test(id):
|
38 |
+
return {"message": f"Question test have id {id} deleted successfully"}
|
39 |
+
else:
|
40 |
+
return {"message": "Error"}
|
app/modules/crud_rag_question_tests/models/crud_rag_question_tests.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uuid
|
2 |
+
from app.configs.database import firebase_bucket, firebase_db
|
3 |
+
|
4 |
+
from datetime import datetime
|
5 |
+
import pytz
|
6 |
+
|
7 |
+
# CRUD operation
|
8 |
+
def upload_file_rag_question_tests(file):
|
9 |
+
re_name_file = str(uuid.uuid4()).replace("-","_") + "_" + file.filename
|
10 |
+
# upload file to firebase storage
|
11 |
+
blob = firebase_bucket.blob(re_name_file)
|
12 |
+
blob.upload_from_file(file.file)
|
13 |
+
# return gs link
|
14 |
+
return f"gs://{firebase_bucket.name}/{re_name_file}"
|
15 |
+
|
16 |
+
def remove_file_rag_question_tests(file_url):
|
17 |
+
# remove file from firebase storage using "gs://" link
|
18 |
+
blob = firebase_bucket.blob(file_url.split(f"gs://{firebase_bucket.name}/")[1])
|
19 |
+
blob.delete()
|
20 |
+
return True
|
21 |
+
|
22 |
+
def get_all_rag_question_tests():
|
23 |
+
# Get all documents from the collection
|
24 |
+
docs = firebase_db.collection("rag_question_tests").stream()
|
25 |
+
data = []
|
26 |
+
for doc in docs:
|
27 |
+
doc_data = doc.to_dict()
|
28 |
+
doc_data["id"] = doc.id
|
29 |
+
data.append(doc_data)
|
30 |
+
return data
|
31 |
+
|
32 |
+
def get_question_test_by_id(id):
|
33 |
+
# Get a document by id
|
34 |
+
doc = firebase_db.collection("rag_question_tests").document(id).get()
|
35 |
+
return doc.to_dict()
|
36 |
+
|
37 |
+
def create_rag_question_test(data):
|
38 |
+
# get file_rag_question_tests
|
39 |
+
file_rag_question_tests = data["question_generator_tests_url"]
|
40 |
+
# upload file to firebase storage
|
41 |
+
file_url = upload_file_rag_question_tests(file_rag_question_tests)
|
42 |
+
|
43 |
+
# Get the current time in UTC
|
44 |
+
utc_now = datetime.utcnow()
|
45 |
+
# Specify the Vietnam time zone
|
46 |
+
vietnam_timezone = pytz.timezone('Asia/Ho_Chi_Minh')
|
47 |
+
# Convert the current time to Vietnam time zone
|
48 |
+
vietnam_now = utc_now.replace(tzinfo=pytz.utc).astimezone(vietnam_timezone).strftime("%Y-%m-%d %H:%M:%S")
|
49 |
+
# add created_at
|
50 |
+
data["created_at"] = vietnam_now
|
51 |
+
|
52 |
+
# add file url to data
|
53 |
+
data["question_generator_tests_url"] = file_url
|
54 |
+
# Create a new document
|
55 |
+
document_ref = firebase_db.collection("rag_question_tests").add(data)
|
56 |
+
return True
|
57 |
+
|
58 |
+
def delete_question_test(id):
|
59 |
+
# Delete a file from firebase storage
|
60 |
+
file_url = get_question_test_by_id(id)["question_generator_tests_url"]
|
61 |
+
remove_file_rag_question_tests(file_url)
|
62 |
+
# Delete a document by id
|
63 |
+
firebase_db.collection("rag_question_tests").document(id).delete()
|
64 |
+
return True
|
app/modules/matching_cv/__init__.py
CHANGED
@@ -4,8 +4,8 @@ from fastapi import APIRouter
|
|
4 |
# from app.modules.matching_cv.models.match_cv_jd_model import Match_JD_CV_Model
|
5 |
|
6 |
from app.modules.matching_cv.models.matching_cv_logic import result_matching_cv_jd
|
7 |
-
from app.modules.crud_jds.models.crud_jds import get_jd_by_id,
|
8 |
-
from app.modules.crud_cvs.models.crud_cvs import get_cv_by_id, file_cv_doc2text
|
9 |
|
10 |
cvmatching_router = APIRouter(prefix="/cvmatching", tags=["cvmatching"])
|
11 |
|
@@ -17,42 +17,10 @@ async def index():
|
|
17 |
# only upload .pdf or .docx file
|
18 |
async def matching_cv_jd(id_jd: str, id_cv:str):
|
19 |
try:
|
20 |
-
|
21 |
-
|
22 |
-
cv_document = get_cv_by_id(id_cv)
|
23 |
|
24 |
-
|
25 |
-
jd_url = jd_document["jd_url"]
|
26 |
-
cv_url = cv_document["cv_url"]
|
27 |
-
|
28 |
-
# get type file cv from cv_url "gs://bucket_name/file_name"
|
29 |
-
cv_type = cv_url.split(".")[-1]
|
30 |
-
if cv_type == "pdf":
|
31 |
-
return {"message": "This feature is not available yet"}
|
32 |
-
elif cv_type == "docx":
|
33 |
-
cv_text = file_cv_doc2text(cv_url)
|
34 |
-
else:
|
35 |
-
return {"message": "Please upload only .pdf or .docx file for CV"}
|
36 |
-
|
37 |
-
# get jd_text from jd_url "gs://bucket_name/file_name"
|
38 |
-
jd_text = file_jd_txt2text(jd_url)
|
39 |
-
|
40 |
-
result = result_matching_cv_jd(cv_text, jd_text)
|
41 |
return {"result": result}
|
42 |
-
# # take jd_upload and cv_upload type file
|
43 |
-
# jd_upload_type = jd_upload.filename.split(".")[-1]
|
44 |
-
# cv_upload_type = cv_upload.filename.split(".")[-1]
|
45 |
-
# if jd_upload_type in ["txt"] and cv_upload_type in ["pdf", "docx"]:
|
46 |
-
# jd_text = jd_upload.file.read().decode("utf-8")
|
47 |
-
# if cv_upload_type == "docx":
|
48 |
-
# cv_text = docx.Document(cv_upload.file).paragraphs
|
49 |
-
# cv_text = "\n".join([i.text for i in cv_text])
|
50 |
-
# elif cv_upload_type == "pdf":
|
51 |
-
# return {"message": "This feature is not available yet"}
|
52 |
-
# # check matching cv and jd
|
53 |
-
# result = result_matching_cv_jd(cv_text, jd_text)
|
54 |
-
# return {"result": result}
|
55 |
-
# else:
|
56 |
-
# return {"message": "Please upload only .txt for JD. And .pdf or .docx file for CV"}
|
57 |
except Exception as e:
|
58 |
return {"Error": str(e)}
|
|
|
4 |
# from app.modules.matching_cv.models.match_cv_jd_model import Match_JD_CV_Model
|
5 |
|
6 |
from app.modules.matching_cv.models.matching_cv_logic import result_matching_cv_jd
|
7 |
+
from app.modules.crud_jds.models.crud_jds import get_jd_by_id, get_jd_text_by_id
|
8 |
+
from app.modules.crud_cvs.models.crud_cvs import get_cv_by_id, file_cv_doc2text, file_cv_pdf2text
|
9 |
|
10 |
cvmatching_router = APIRouter(prefix="/cvmatching", tags=["cvmatching"])
|
11 |
|
|
|
17 |
# only upload .pdf or .docx file
|
18 |
async def matching_cv_jd(id_jd: str, id_cv:str):
|
19 |
try:
|
20 |
+
cv_content = get_cv_by_id(id_cv)
|
21 |
+
jd_text = get_jd_text_by_id(id_jd)
|
|
|
22 |
|
23 |
+
result = result_matching_cv_jd(cv_text=cv_content,jd_text=jd_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
return {"result": result}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
except Exception as e:
|
26 |
return {"Error": str(e)}
|
app/modules/matching_cv/models/match_cv_jd_model.py
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
from fastapi import APIRouter, UploadFile, File
|
2 |
-
|
3 |
-
class Match_JD_CV_Model:
|
4 |
-
jd = UploadFile
|
5 |
-
jd_default = File(..., description="Upload JD file (only .txt file)", media_type=["text/plain"])
|
6 |
-
cv = UploadFile
|
7 |
-
cv_default = File(..., description="Upload CV file (only .pdf and .docx)", media_type=["application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/modules/matching_cv/models/matching_cv_logic.py
CHANGED
@@ -6,6 +6,8 @@ from dotenv import load_dotenv
|
|
6 |
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
|
7 |
from langchain_core.messages import SystemMessage
|
8 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
|
|
|
9 |
|
10 |
# import the json oupput parser from the langchain core
|
11 |
from langchain_core.output_parsers import JsonOutputParser
|
@@ -18,32 +20,38 @@ load_dotenv()
|
|
18 |
|
19 |
# Define the google api key
|
20 |
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
|
|
|
|
|
21 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# def matching cv and jd return percentage of matching using prompt template
|
24 |
-
def result_matching_cv_jd(cv_text, jd_text):
|
25 |
-
# create the prompt template
|
26 |
-
chat_template = ChatPromptTemplate.from_messages(
|
27 |
-
[
|
28 |
-
SystemMessage(
|
29 |
-
content=(
|
30 |
-
"""
|
31 |
-
Given the following CV and JD, calculate the percentage match between the candidate's qualifications and the job requirements:
|
32 |
-
CV: {cv}
|
33 |
-
JD: {jd}
|
34 |
-
To determine the match percentage, analyze the skills and experience in the CV and compare them to the requirements outlined in the JD. Provide the final match percentage as a numeric value between 0-100%, along with a brief explanation of your analysis. Follow this json format: {"Skills Match": {"Required Skills": "","Candidate Skills": "","Match Percentage": "",}, "Experience Match": {"Required Experience": "","Candidate Experience": "","Match Percentage": "",}, "Overall Match Percentage:": "", "Explanation": ""}
|
35 |
-
"""
|
36 |
-
)
|
37 |
-
),
|
38 |
-
HumanMessagePromptTemplate.from_template(["{cv}", "{jd}"]),
|
39 |
-
]
|
40 |
-
)
|
41 |
-
|
42 |
# create the chat message
|
43 |
chat_message = chat_template.format_messages(cv=cv_text, jd=jd_text)
|
44 |
-
|
45 |
-
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, convert_system_message_to_human=True, api_key=GOOGLE_API_KEY, request_timeout=120)
|
46 |
-
chain = llm | parser
|
47 |
result = chain.invoke(chat_message)
|
48 |
|
49 |
return result
|
|
|
6 |
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
|
7 |
from langchain_core.messages import SystemMessage
|
8 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
9 |
+
from langchain_anthropic import ChatAnthropic
|
10 |
+
from langchain_openai import OpenAI
|
11 |
|
12 |
# import the json oupput parser from the langchain core
|
13 |
from langchain_core.output_parsers import JsonOutputParser
|
|
|
20 |
|
21 |
# Define the google api key
|
22 |
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
|
23 |
+
os.environ['CLAUDE_API_KEY'] = os.getenv('CLAUDE_API_KEY')
|
24 |
+
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
|
25 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
26 |
+
CLAUDE_API_KEY = os.environ.get("CLAUDE_API_KEY")
|
27 |
+
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
28 |
+
|
29 |
+
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, convert_system_message_to_human=True, api_key=GOOGLE_API_KEY, request_timeout=120)
|
30 |
+
# llm = ChatAnthropic(temperature=0.3, model_name="claude-3-opus-20240229", anthropic_api_key=CLAUDE_API_KEY, default_request_timeout=120)
|
31 |
+
# llm = OpenAI(model_name="gpt-3.5-turbo-0125", openai_api_key=OPENAI_API_KEY)
|
32 |
+
chain = llm | parser
|
33 |
+
|
34 |
+
# create the prompt template
|
35 |
+
chat_template = ChatPromptTemplate.from_messages(
|
36 |
+
[
|
37 |
+
SystemMessage(
|
38 |
+
content=(
|
39 |
+
"""
|
40 |
+
Given the following CV and JD, calculate the percentage match between the candidate's qualifications and the job requirements:
|
41 |
+
CV: {cv}
|
42 |
+
JD: {jd}
|
43 |
+
To determine the match percentage, analyze the skills and experience in the CV and compare them to the requirements outlined in the JD. Provide the final match percentage as a numeric value between 0-100%, along with a brief explanation of your analysis. Follow this json format: {"Skills Match": {"Required Skills": "","Candidate Skills": "","Match Percentage": "",}, "Experience Match": {"Required Experience": "","Candidate Experience": "","Match Percentage": "",}, "Overall Match Percentage:": "", "Explanation": ""}
|
44 |
+
"""
|
45 |
+
)
|
46 |
+
),
|
47 |
+
HumanMessagePromptTemplate.from_template(["{cv}", "{jd}"]),
|
48 |
+
]
|
49 |
+
)
|
50 |
|
51 |
# def matching cv and jd return percentage of matching using prompt template
|
52 |
+
def result_matching_cv_jd(cv_text:str, jd_text:str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
# create the chat message
|
54 |
chat_message = chat_template.format_messages(cv=cv_text, jd=jd_text)
|
|
|
|
|
|
|
55 |
result = chain.invoke(chat_message)
|
56 |
|
57 |
return result
|
app/modules/question_tests_retrieval/__init__.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
-
from fastapi import APIRouter
|
2 |
-
from typing import Annotated
|
3 |
|
4 |
from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
|
5 |
from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_tests
|
6 |
-
from app.modules.crud_jds.models.crud_jds import get_jd_by_id,
|
7 |
|
8 |
qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
|
9 |
|
@@ -15,15 +14,13 @@ async def index():
|
|
15 |
# only upload .txt file
|
16 |
async def send_jd_to_get_question(id_jd: str):
|
17 |
try:
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
sumaryjd_text = jobdes2text(jd_file_string)
|
22 |
if get_question_tests(sumaryjd_text):
|
23 |
return {"message": "Send JD successfully and get question test successfully",
|
24 |
-
"
|
25 |
else:
|
26 |
-
return {"
|
27 |
except Exception as e:
|
28 |
return {"message": "Have error when find JD in database", "error": str(e)}
|
29 |
-
|
|
|
1 |
+
from fastapi import APIRouter
|
|
|
2 |
|
3 |
from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
|
4 |
from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_tests
|
5 |
+
from app.modules.crud_jds.models.crud_jds import get_jd_by_id, get_jd_text_by_id
|
6 |
|
7 |
qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
|
8 |
|
|
|
14 |
# only upload .txt file
|
15 |
async def send_jd_to_get_question(id_jd: str):
|
16 |
try:
|
17 |
+
# get jd_text by id
|
18 |
+
jd_text = get_jd_text_by_id(id_jd)
|
19 |
+
sumaryjd_text = jobdes2text(jd_text)
|
|
|
20 |
if get_question_tests(sumaryjd_text):
|
21 |
return {"message": "Send JD successfully and get question test successfully",
|
22 |
+
"sumary JD": sumaryjd_text}
|
23 |
else:
|
24 |
+
return {"error": str(e)}
|
25 |
except Exception as e:
|
26 |
return {"message": "Have error when find JD in database", "error": str(e)}
|
|
app/modules/question_tests_retrieval/models/jd2text.py
CHANGED
@@ -2,6 +2,8 @@ from langchain_google_genai import ChatGoogleGenerativeAI
|
|
2 |
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
|
3 |
from langchain_core.messages import SystemMessage
|
4 |
from langchain_core.output_parsers import JsonOutputParser
|
|
|
|
|
5 |
import os
|
6 |
from dotenv import load_dotenv
|
7 |
|
@@ -10,33 +12,33 @@ load_dotenv()
|
|
10 |
|
11 |
# Define the google api key
|
12 |
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
|
|
|
13 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
|
|
14 |
|
15 |
# define the parser object
|
16 |
parser = JsonOutputParser()
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
# create the chat message
|
35 |
chat_message = finnal_jd_chat_template.format_messages(text=jobdes)
|
36 |
-
|
37 |
# create a chain
|
38 |
chain = llm
|
39 |
-
|
40 |
result = chain.invoke(chat_message)
|
41 |
-
|
42 |
return result.content
|
|
|
2 |
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
|
3 |
from langchain_core.messages import SystemMessage
|
4 |
from langchain_core.output_parsers import JsonOutputParser
|
5 |
+
from langchain_anthropic import ChatAnthropic
|
6 |
+
|
7 |
import os
|
8 |
from dotenv import load_dotenv
|
9 |
|
|
|
12 |
|
13 |
# Define the google api key
|
14 |
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
|
15 |
+
os.environ['CLAUDE_API_KEY'] = os.getenv('CLAUDE_API_KEY')
|
16 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
17 |
+
CLAUDE_API_KEY = os.environ.get("CLAUDE_API_KEY")
|
18 |
|
19 |
# define the parser object
|
20 |
parser = JsonOutputParser()
|
21 |
|
22 |
+
# setup the gemini pro
|
23 |
+
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, convert_system_message_to_human=True, api_key=GOOGLE_API_KEY, request_timeout=120)
|
24 |
+
# llm = ChatAnthropic(temperature=0, anthropic_api_key=CLAUDE_API_KEY, model_name="claude-3-opus-20240229")
|
25 |
+
# create the prompt template
|
26 |
+
finnal_jd_chat_template = ChatPromptTemplate.from_messages(
|
27 |
+
[
|
28 |
+
SystemMessage(
|
29 |
+
content=(
|
30 |
+
"""Return Job title, level(Fresher, Junior, Senior, ...) and Brief summary of required skills about 20 words from the job description. Use the following format: Job Title is {job title}, Level is {level}, and Brief summary of required skills is {brief summary of required skills}."""
|
31 |
+
)
|
32 |
+
),
|
33 |
+
HumanMessagePromptTemplate.from_template("{text}"),
|
34 |
+
]
|
35 |
+
)
|
36 |
+
|
37 |
+
def jobdes2text(jobdes: str):
|
38 |
# create the chat message
|
39 |
chat_message = finnal_jd_chat_template.format_messages(text=jobdes)
|
|
|
40 |
# create a chain
|
41 |
chain = llm
|
|
|
42 |
result = chain.invoke(chat_message)
|
43 |
+
|
44 |
return result.content
|
app/modules/question_tests_retrieval/models/question_tests_logic.py
CHANGED
@@ -20,21 +20,6 @@ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
|
20 |
# Setting model embedding
|
21 |
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY, request_timeout=120)
|
22 |
gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
|
23 |
-
|
24 |
-
# def compare_vector(vector_extract, vector_des):
|
25 |
-
# maxnimun_value = 2
|
26 |
-
# for item in vector_des:
|
27 |
-
# two_object = (vector_extract, item)
|
28 |
-
# x = gemini_evaluator.evaluate_strings(prediction=two_object[0], reference=two_object[1])
|
29 |
-
# if x.get('score') < maxnimun_value:
|
30 |
-
# maxnimun_value = x.get('score')
|
31 |
-
# des_item_choose = item
|
32 |
-
# if maxnimun_value == 2:
|
33 |
-
# return False
|
34 |
-
# elif maxnimun_value < 0.3:
|
35 |
-
# return des_item_choose
|
36 |
-
# else:
|
37 |
-
# return False
|
38 |
|
39 |
def compare_vector(description_vector, max_number_of_points=3):
|
40 |
similarity_list = qdrant_client.search(
|
@@ -51,17 +36,7 @@ def compare_vector(description_vector, max_number_of_points=3):
|
|
51 |
|
52 |
return formatted_similarity_list
|
53 |
|
54 |
-
|
55 |
-
# # check folder exist
|
56 |
-
# if not os.path.exists('data/question_tests'):
|
57 |
-
# os.makedirs('data/question_tests')
|
58 |
-
# # download file from firebase storage using "gs://" link
|
59 |
-
# name_bucket = question_test_url.split(f"gs://{firebase_bucket.name}/")[1]
|
60 |
-
# blob = firebase_bucket.blob(name_bucket)
|
61 |
-
# blob.download_to_filename(f'data/question_tests/{name_bucket}')
|
62 |
-
# return True
|
63 |
-
|
64 |
-
def download_question_test(question_test_url_list):
|
65 |
# check folder exist
|
66 |
if not os.path.exists('data/question_tests'):
|
67 |
os.makedirs('data/question_tests')
|
@@ -73,23 +48,7 @@ def download_question_test(question_test_url_list):
|
|
73 |
|
74 |
return True
|
75 |
|
76 |
-
|
77 |
-
# def get_question_test(text):
|
78 |
-
# all_question_tests = get_all_question_tests()
|
79 |
-
# value_in_des = []
|
80 |
-
# for item in all_question_tests:
|
81 |
-
# value_in_des.append(item['question_tests_description'])
|
82 |
-
# des_item_choose = compare_vector(text, value_in_des)
|
83 |
-
# if des_item_choose == False:
|
84 |
-
# return "No question test found"
|
85 |
-
# else:
|
86 |
-
# question_test_url = get_question_test_url_by_description(des_item_choose)
|
87 |
-
# if download_question_test(question_test_url):
|
88 |
-
# return True
|
89 |
-
# else:
|
90 |
-
# return False
|
91 |
-
|
92 |
-
def get_question_tests(text):
|
93 |
# Get formatted similarity list
|
94 |
formatted_similarity_list = compare_vector(text2vector(text))
|
95 |
# Get corresponding document url in Firebase and download them
|
|
|
20 |
# Setting model embedding
|
21 |
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY, request_timeout=120)
|
22 |
gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
def compare_vector(description_vector, max_number_of_points=3):
|
25 |
similarity_list = qdrant_client.search(
|
|
|
36 |
|
37 |
return formatted_similarity_list
|
38 |
|
39 |
+
def download_question_test(question_test_url_list: list):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
# check folder exist
|
41 |
if not os.path.exists('data/question_tests'):
|
42 |
os.makedirs('data/question_tests')
|
|
|
48 |
|
49 |
return True
|
50 |
|
51 |
+
def get_question_tests(text: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
# Get formatted similarity list
|
53 |
formatted_similarity_list = compare_vector(text2vector(text))
|
54 |
# Get corresponding document url in Firebase and download them
|
app/modules/question_tests_retrieval/models/text2vector.py
CHANGED
@@ -8,8 +8,8 @@ load_dotenv()
|
|
8 |
# Define the google api key
|
9 |
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
|
10 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
|
|
11 |
|
12 |
-
def text2vector(text):
|
13 |
-
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY, request_timeout=120)
|
14 |
vector = embeddings.embed_query(text)
|
15 |
return vector
|
|
|
8 |
# Define the google api key
|
9 |
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
|
10 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
11 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY, request_timeout=120)
|
12 |
|
13 |
+
def text2vector(text: str):
|
|
|
14 |
vector = embeddings.embed_query(text)
|
15 |
return vector
|
scrapping.py
DELETED
@@ -1,8 +0,0 @@
|
|
1 |
-
from bs4 import BeautifulSoup
|
2 |
-
import requests
|
3 |
-
|
4 |
-
url = "https://en.wikipedia.org/wiki/List_of_largest_companies_by_revenue"
|
5 |
-
page = requests.get(url)
|
6 |
-
soup = BeautifulSoup(page.text, 'html')
|
7 |
-
|
8 |
-
print(soup)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tmp/.gitkeep
ADDED
File without changes
|