Spaces:
Build error
Build error
TruongTrongTien
commited on
Commit
•
915beec
1
Parent(s):
e5d71c1
Phase2/TienTT: Add query vectordatabase and update delete
Browse files
app/modules/crud_question_test/models/crud_question_tests.py
CHANGED
@@ -68,6 +68,8 @@ def create_question_test(data):
|
|
68 |
def update_question_test(id, data):
|
69 |
# Update a document by id
|
70 |
firebase_db.collection("question_tests").document(id).update(data)
|
|
|
|
|
71 |
return True
|
72 |
|
73 |
def delete_question_test(id):
|
@@ -76,4 +78,19 @@ def delete_question_test(id):
|
|
76 |
remove_file_question_tests(file_url)
|
77 |
# Delete a document by id
|
78 |
firebase_db.collection("question_tests").document(id).delete()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
return True
|
|
|
68 |
def update_question_test(id, data):
|
69 |
# Update a document by id
|
70 |
firebase_db.collection("question_tests").document(id).update(data)
|
71 |
+
# Update corrensponding vector in Qdrant
|
72 |
+
|
73 |
return True
|
74 |
|
75 |
def delete_question_test(id):
|
|
|
78 |
remove_file_question_tests(file_url)
|
79 |
# Delete a document by id
|
80 |
firebase_db.collection("question_tests").document(id).delete()
|
81 |
+
|
82 |
+
# Delete corresponding vector from Qdrant
|
83 |
+
qdrant_client.delete(
|
84 |
+
collection_name="question_tests",
|
85 |
+
points_selector=models.FilterSelector(
|
86 |
+
filter=models.Filter(
|
87 |
+
must=[
|
88 |
+
models.FieldCondition(
|
89 |
+
key="id",
|
90 |
+
match=models.MatchValue(value=id),
|
91 |
+
),
|
92 |
+
],
|
93 |
+
)
|
94 |
+
),
|
95 |
+
)
|
96 |
return True
|
app/modules/question_tests_retrieval/__init__.py
CHANGED
@@ -2,8 +2,8 @@ from fastapi import APIRouter, UploadFile, File
|
|
2 |
from typing import Annotated
|
3 |
|
4 |
from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
|
5 |
-
# from app.modules.question_tests_retrieval.models.
|
6 |
-
from app.modules.question_tests_retrieval.models.question_tests_logic import
|
7 |
|
8 |
qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
|
9 |
|
@@ -18,10 +18,13 @@ async def send_jd(txt_file: Annotated[UploadFile, File(..., description="The JD
|
|
18 |
# read the txt file with format
|
19 |
jobdes = txt_file.file.read().decode("utf-8")
|
20 |
sumaryjd_text = jobdes2text(jobdes)
|
21 |
-
if
|
22 |
return {"message": "Send JD successfully and get question test successfully",
|
23 |
"sumaryjd_text": sumaryjd_text}
|
24 |
else:
|
25 |
return {"message": "Please upload only .txt file", "error": str(e)}
|
26 |
except Exception as e:
|
27 |
return {"message": "Please upload only .txt file", "error": str(e)}
|
|
|
|
|
|
|
|
2 |
from typing import Annotated
|
3 |
|
4 |
from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
|
5 |
+
# from app.modules.question_tests_retrieval.models.text2vector import text2vector
|
6 |
+
from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_tests
|
7 |
|
8 |
qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
|
9 |
|
|
|
18 |
# read the txt file with format
|
19 |
jobdes = txt_file.file.read().decode("utf-8")
|
20 |
sumaryjd_text = jobdes2text(jobdes)
|
21 |
+
if get_question_tests(sumaryjd_text):
|
22 |
return {"message": "Send JD successfully and get question test successfully",
|
23 |
"sumaryjd_text": sumaryjd_text}
|
24 |
else:
|
25 |
return {"message": "Please upload only .txt file", "error": str(e)}
|
26 |
except Exception as e:
|
27 |
return {"message": "Please upload only .txt file", "error": str(e)}
|
28 |
+
|
29 |
+
# @qtretrieval_router.post("/download_tests")
|
30 |
+
# async def download_tests()
|
app/modules/question_tests_retrieval/models/question_tests_logic.py
CHANGED
@@ -5,8 +5,10 @@ from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
|
5 |
from langchain.evaluation import load_evaluator
|
6 |
from langchain.evaluation import EmbeddingDistance
|
7 |
|
8 |
-
from app.modules.crud_question_test.models.crud_question_tests import
|
|
|
9 |
from app.configs.database import firebase_bucket
|
|
|
10 |
|
11 |
# Import API key
|
12 |
load_dotenv()
|
@@ -19,45 +21,84 @@ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
|
19 |
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
|
20 |
gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
37 |
|
38 |
-
def download_question_test(
|
39 |
# check folder exist
|
40 |
if not os.path.exists('data/question_tests'):
|
41 |
os.makedirs('data/question_tests')
|
42 |
# download file from firebase storage using "gs://" link
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
46 |
return True
|
47 |
-
|
48 |
|
49 |
|
50 |
-
def get_question_test(text):
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
else:
|
59 |
-
|
60 |
-
if download_question_test(question_test_url):
|
61 |
-
return True
|
62 |
-
else:
|
63 |
-
return False
|
|
|
5 |
from langchain.evaluation import load_evaluator
|
6 |
from langchain.evaluation import EmbeddingDistance
|
7 |
|
8 |
+
from app.modules.crud_question_test.models.crud_question_tests import get_question_test_by_id
|
9 |
+
from app.modules.question_tests_retrieval.models.text2vector import text2vector
|
10 |
from app.configs.database import firebase_bucket
|
11 |
+
from app.configs.qdrant_db import qdrant_client
|
12 |
|
13 |
# Import API key
|
14 |
load_dotenv()
|
|
|
21 |
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
|
22 |
gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
|
23 |
|
24 |
+
# def compare_vector(vector_extract, vector_des):
|
25 |
+
# maxnimun_value = 2
|
26 |
+
# for item in vector_des:
|
27 |
+
# two_object = (vector_extract, item)
|
28 |
+
# x = gemini_evaluator.evaluate_strings(prediction=two_object[0], reference=two_object[1])
|
29 |
+
# if x.get('score') < maxnimun_value:
|
30 |
+
# maxnimun_value = x.get('score')
|
31 |
+
# des_item_choose = item
|
32 |
+
# if maxnimun_value == 2:
|
33 |
+
# return False
|
34 |
+
# elif maxnimun_value < 0.3:
|
35 |
+
# return des_item_choose
|
36 |
+
# else:
|
37 |
+
# return False
|
38 |
+
|
39 |
+
def compare_vector(description_vector, max_number_of_points=10):
|
40 |
+
similarity_list = qdrant_client.search(
|
41 |
+
collection_name="question_tests",
|
42 |
+
query_vector=description_vector,
|
43 |
+
limit=max_number_of_points,
|
44 |
+
with_vectors=False,
|
45 |
+
with_payload=True,
|
46 |
+
)
|
47 |
|
48 |
+
formatted_similarity_list = []
|
49 |
+
for point in similarity_list:
|
50 |
+
formatted_similarity_list.append({"id": point.payload.get("id"), "score": point.score})
|
51 |
+
|
52 |
+
return formatted_similarity_list
|
53 |
+
|
54 |
+
# def download_question_test(question_test_url):
|
55 |
+
# # check folder exist
|
56 |
+
# if not os.path.exists('data/question_tests'):
|
57 |
+
# os.makedirs('data/question_tests')
|
58 |
+
# # download file from firebase storage using "gs://" link
|
59 |
+
# name_bucket = question_test_url.split(f"gs://{firebase_bucket.name}/")[1]
|
60 |
+
# blob = firebase_bucket.blob(name_bucket)
|
61 |
+
# blob.download_to_filename(f'data/question_tests/{name_bucket}')
|
62 |
+
# return True
|
63 |
|
64 |
+
def download_question_test(question_test_url_list):
|
65 |
# check folder exist
|
66 |
if not os.path.exists('data/question_tests'):
|
67 |
os.makedirs('data/question_tests')
|
68 |
# download file from firebase storage using "gs://" link
|
69 |
+
for url in question_test_url_list:
|
70 |
+
name_bucket = url.split(f"gs://{firebase_bucket.name}/")[1]
|
71 |
+
blob = firebase_bucket.blob(name_bucket)
|
72 |
+
blob.download_to_filename(f'data/question_tests/{name_bucket}')
|
73 |
+
|
74 |
return True
|
|
|
75 |
|
76 |
|
77 |
+
# def get_question_test(text):
|
78 |
+
# all_question_tests = get_all_question_tests()
|
79 |
+
# value_in_des = []
|
80 |
+
# for item in all_question_tests:
|
81 |
+
# value_in_des.append(item['question_tests_description'])
|
82 |
+
# des_item_choose = compare_vector(text, value_in_des)
|
83 |
+
# if des_item_choose == False:
|
84 |
+
# return "No question test found"
|
85 |
+
# else:
|
86 |
+
# question_test_url = get_question_test_url_by_description(des_item_choose)
|
87 |
+
# if download_question_test(question_test_url):
|
88 |
+
# return True
|
89 |
+
# else:
|
90 |
+
# return False
|
91 |
+
|
92 |
+
def get_question_tests(text):
|
93 |
+
# Get formatted similarity list
|
94 |
+
formatted_similarity_list = compare_vector(text2vector(text))
|
95 |
+
# Get corresponding document url in Firebase and download them
|
96 |
+
question_test_url_list = []
|
97 |
+
for point in formatted_similarity_list:
|
98 |
+
id = point.get("id")
|
99 |
+
question_test_url_list.append(get_question_test_by_id(id).get("question_tests_url"))
|
100 |
+
|
101 |
+
if download_question_test(question_test_url_list):
|
102 |
+
return True
|
103 |
else:
|
104 |
+
return False
|
|
|
|
|
|
|
|
test.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
5 |
+
from langchain.evaluation import load_evaluator
|
6 |
+
from langchain.evaluation import EmbeddingDistance
|
7 |
+
|
8 |
+
from app.modules.crud_question_test.models.crud_question_tests import get_question_test_by_id
|
9 |
+
from app.modules.question_tests_retrieval.models.text2vector import text2vector
|
10 |
+
from app.configs.database import firebase_bucket
|
11 |
+
from app.configs.qdrant_db import qdrant_client
|
12 |
+
|
13 |
+
# Import API key
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
# Define the google api key
|
17 |
+
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
|
18 |
+
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
19 |
+
|
20 |
+
# Setting model embedding
|
21 |
+
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
|
22 |
+
gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
|
23 |
+
|
24 |
+
# def compare_vector(vector_extract, vector_des):
|
25 |
+
# maxnimun_value = 2
|
26 |
+
# for item in vector_des:
|
27 |
+
# two_object = (vector_extract, item)
|
28 |
+
# x = gemini_evaluator.evaluate_strings(prediction=two_object[0], reference=two_object[1])
|
29 |
+
# if x.get('score') < maxnimun_value:
|
30 |
+
# maxnimun_value = x.get('score')
|
31 |
+
# des_item_choose = item
|
32 |
+
# if maxnimun_value == 2:
|
33 |
+
# return False
|
34 |
+
# elif maxnimun_value < 0.3:
|
35 |
+
# return des_item_choose
|
36 |
+
# else:
|
37 |
+
# return False
|
38 |
+
|
39 |
+
def compare_vector(description_vector, max_number_of_points=3):
|
40 |
+
similarity_list = qdrant_client.search(
|
41 |
+
collection_name="question_tests",
|
42 |
+
query_vector=description_vector,
|
43 |
+
limit=max_number_of_points,
|
44 |
+
with_vectors=False,
|
45 |
+
with_payload=True,
|
46 |
+
)
|
47 |
+
|
48 |
+
formatted_similarity_list = []
|
49 |
+
for point in similarity_list:
|
50 |
+
formatted_similarity_list.append({"id": point.payload.get("id"), "score": point.score})
|
51 |
+
|
52 |
+
return formatted_similarity_list
|
53 |
+
|
54 |
+
# def download_question_test(question_test_url):
|
55 |
+
# # check folder exist
|
56 |
+
# if not os.path.exists('data/question_tests'):
|
57 |
+
# os.makedirs('data/question_tests')
|
58 |
+
# # download file from firebase storage using "gs://" link
|
59 |
+
# name_bucket = question_test_url.split(f"gs://{firebase_bucket.name}/")[1]
|
60 |
+
# blob = firebase_bucket.blob(name_bucket)
|
61 |
+
# blob.download_to_filename(f'data/question_tests/{name_bucket}')
|
62 |
+
# return True
|
63 |
+
|
64 |
+
def download_question_test(question_test_url_list):
|
65 |
+
# check folder exist
|
66 |
+
if not os.path.exists('data/question_tests'):
|
67 |
+
os.makedirs('data/question_tests')
|
68 |
+
# download file from firebase storage using "gs://" link
|
69 |
+
for url in question_test_url_list:
|
70 |
+
name_bucket = url.split(f"gs://{firebase_bucket.name}/")[1]
|
71 |
+
blob = firebase_bucket.blob(name_bucket)
|
72 |
+
blob.download_to_filename(f'data/question_tests/{name_bucket}')
|
73 |
+
|
74 |
+
return True
|
75 |
+
|
76 |
+
|
77 |
+
# def get_question_test(text):
|
78 |
+
# all_question_tests = get_all_question_tests()
|
79 |
+
# value_in_des = []
|
80 |
+
# for item in all_question_tests:
|
81 |
+
# value_in_des.append(item['question_tests_description'])
|
82 |
+
# des_item_choose = compare_vector(text, value_in_des)
|
83 |
+
# if des_item_choose == False:
|
84 |
+
# return "No question test found"
|
85 |
+
# else:
|
86 |
+
# question_test_url = get_question_test_url_by_description(des_item_choose)
|
87 |
+
# if download_question_test(question_test_url):
|
88 |
+
# return True
|
89 |
+
# else:
|
90 |
+
# return False
|
91 |
+
|
92 |
+
def get_question_tests(text):
|
93 |
+
# Get formatted similarity list
|
94 |
+
formatted_similarity_list = compare_vector(text2vector(text))
|
95 |
+
# Get corresponding document url in Firebase and download them
|
96 |
+
question_test_url_list = []
|
97 |
+
for point in formatted_similarity_list:
|
98 |
+
id = point.get("id")
|
99 |
+
question_test_url_list.append(get_question_test_by_id(id).get("question_tests_url"))
|
100 |
+
# question_test_url_list = [get_question_test_by_id(id).get("question_tests_url") for point in formatted_similarity_list]
|
101 |
+
|
102 |
+
if download_question_test(question_test_url_list):
|
103 |
+
return True
|
104 |
+
else:
|
105 |
+
return False
|
106 |
+
|
107 |
+
get_question_tests("I am a Junior AI Engineer")
|
test2.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uuid
|
2 |
+
from app.configs.database import firebase_bucket, firebase_db
|
3 |
+
from app.configs.qdrant_db import qdrant_client
|
4 |
+
from app.configs.qdrant_db import models
|
5 |
+
from app.modules.question_tests_retrieval.models.text2vector import text2vector
|
6 |
+
|
7 |
+
|
8 |
+
# CRUD operation
|
9 |
+
def upload_file_question_tests(file):
|
10 |
+
re_name_file = str(uuid.uuid4()).replace("-","_") + "_" + file.filename
|
11 |
+
# upload file to firebase storage
|
12 |
+
blob = firebase_bucket.blob(re_name_file)
|
13 |
+
blob.upload_from_file(file.file)
|
14 |
+
# return gs link
|
15 |
+
return f"gs://{firebase_bucket.name}/{re_name_file}"
|
16 |
+
|
17 |
+
def remove_file_question_tests(file_url):
|
18 |
+
# remove file from firebase storage using "gs://" link
|
19 |
+
blob = firebase_bucket.blob(file_url.split(f"gs://{firebase_bucket.name}/")[1])
|
20 |
+
blob.delete()
|
21 |
+
return True
|
22 |
+
|
23 |
+
def get_all_question_tests():
|
24 |
+
# Get all documents from the collection
|
25 |
+
docs = firebase_db.collection("question_tests").stream()
|
26 |
+
data = []
|
27 |
+
for doc in docs:
|
28 |
+
doc_data = doc.to_dict()
|
29 |
+
doc_data["id"] = doc.id
|
30 |
+
data.append(doc_data)
|
31 |
+
return data
|
32 |
+
|
33 |
+
def get_question_test_by_id(id):
|
34 |
+
# Get a document by id
|
35 |
+
doc = firebase_db.collection("question_tests").document(id).get()
|
36 |
+
return doc.to_dict()
|
37 |
+
|
38 |
+
def get_question_test_url_by_description(description):
|
39 |
+
# Get a question_tests_url where question_tests_description is equal to description
|
40 |
+
docs = firebase_db.collection("question_tests").where("question_tests_description", "==", description).stream()
|
41 |
+
for doc in docs:
|
42 |
+
return doc.to_dict()["question_tests_url"]
|
43 |
+
return False
|
44 |
+
|
45 |
+
|
46 |
+
def create_question_test(data):
|
47 |
+
# get file_question_tests
|
48 |
+
file_question_tests = data["question_tests_url"]
|
49 |
+
# upload file to firebase storage
|
50 |
+
file_url = upload_file_question_tests(file_question_tests)
|
51 |
+
# add file url to data
|
52 |
+
data["question_tests_url"] = file_url
|
53 |
+
question_tests_des = data["question_tests_description"]
|
54 |
+
# Create a new document
|
55 |
+
document_ref = firebase_db.collection("question_tests").add(data)
|
56 |
+
document_id = document_ref[1].id
|
57 |
+
|
58 |
+
# Upload vector to Qdrant
|
59 |
+
collection_info = qdrant_client.get_collection('question_tests')
|
60 |
+
points_count = collection_info.points_count
|
61 |
+
description_vector = text2vector(question_tests_des)
|
62 |
+
payload = {"id": document_id}
|
63 |
+
point = models.PointStruct(id=points_count+1, payload=payload, vector=description_vector)
|
64 |
+
qdrant_client.upsert(collection_name="question_tests", points=[point])
|
65 |
+
|
66 |
+
return True
|
67 |
+
|
68 |
+
def update_question_test(id, data):
|
69 |
+
# Update a document by id
|
70 |
+
firebase_db.collection("question_tests").document(id).update(data)
|
71 |
+
# Update corrensponding vector in Qdrant
|
72 |
+
|
73 |
+
return True
|
74 |
+
|
75 |
+
def delete_question_test(id):
|
76 |
+
# Delete a file from firebase storage
|
77 |
+
file_url = get_question_test_by_id(id)["question_tests_url"]
|
78 |
+
remove_file_question_tests(file_url)
|
79 |
+
# Delete a document by id
|
80 |
+
firebase_db.collection("question_tests").document(id).delete()
|
81 |
+
|
82 |
+
# Delete corresponding vector from Qdrant
|
83 |
+
qdrant_client.delete(
|
84 |
+
collection_name="question_tests",
|
85 |
+
points_selector=models.FilterSelector(
|
86 |
+
filter=models.Filter(
|
87 |
+
must=[
|
88 |
+
models.FieldCondition(
|
89 |
+
key="id",
|
90 |
+
match=models.MatchValue(value=id),
|
91 |
+
),
|
92 |
+
],
|
93 |
+
)
|
94 |
+
),
|
95 |
+
)
|
96 |
+
return True
|