Trương Trọng Tiến commited on
Commit
1d41829
2 Parent(s): 58d5cc2 aef9bf8

Merge pull request #16 from fsa-simpleqt/TienTT

Browse files

Phase2/TienTT: Add Query vector database and Delete tests

app/modules/crud_question_test/models/crud_question_tests.py CHANGED
@@ -68,6 +68,8 @@ def create_question_test(data):
68
  def update_question_test(id, data):
69
  # Update a document by id
70
  firebase_db.collection("question_tests").document(id).update(data)
 
 
71
  return True
72
 
73
  def delete_question_test(id):
@@ -76,4 +78,19 @@ def delete_question_test(id):
76
  remove_file_question_tests(file_url)
77
  # Delete a document by id
78
  firebase_db.collection("question_tests").document(id).delete()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  return True
 
68
  def update_question_test(id, data):
69
  # Update a document by id
70
  firebase_db.collection("question_tests").document(id).update(data)
71
+ # Update corrensponding vector in Qdrant
72
+
73
  return True
74
 
75
  def delete_question_test(id):
 
78
  remove_file_question_tests(file_url)
79
  # Delete a document by id
80
  firebase_db.collection("question_tests").document(id).delete()
81
+
82
+ # Delete corresponding vector from Qdrant
83
+ qdrant_client.delete(
84
+ collection_name="question_tests",
85
+ points_selector=models.FilterSelector(
86
+ filter=models.Filter(
87
+ must=[
88
+ models.FieldCondition(
89
+ key="id",
90
+ match=models.MatchValue(value=id),
91
+ ),
92
+ ],
93
+ )
94
+ ),
95
+ )
96
  return True
app/modules/question_tests_retrieval/__init__.py CHANGED
@@ -2,8 +2,8 @@ from fastapi import APIRouter, UploadFile, File
2
  from typing import Annotated
3
 
4
  from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
5
- # from app.modules.question_tests_retrieval.models.text2tector import text2vector
6
- from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_test
7
 
8
  qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
9
 
@@ -18,10 +18,11 @@ async def send_jd(txt_file: Annotated[UploadFile, File(..., description="The JD
18
  # read the txt file with format
19
  jobdes = txt_file.file.read().decode("utf-8")
20
  sumaryjd_text = jobdes2text(jobdes)
21
- if get_question_test(sumaryjd_text):
22
  return {"message": "Send JD successfully and get question test successfully",
23
  "sumaryjd_text": sumaryjd_text}
24
  else:
25
  return {"message": "Please upload only .txt file", "error": str(e)}
26
  except Exception as e:
27
  return {"message": "Please upload only .txt file", "error": str(e)}
 
 
2
  from typing import Annotated
3
 
4
  from app.modules.question_tests_retrieval.models.jd2text import jobdes2text
5
+ # from app.modules.question_tests_retrieval.models.text2vector import text2vector
6
+ from app.modules.question_tests_retrieval.models.question_tests_logic import get_question_tests
7
 
8
  qtretrieval_router = APIRouter(prefix="/qtretrieval", tags=["qtretrieval"])
9
 
 
18
  # read the txt file with format
19
  jobdes = txt_file.file.read().decode("utf-8")
20
  sumaryjd_text = jobdes2text(jobdes)
21
+ if get_question_tests(sumaryjd_text):
22
  return {"message": "Send JD successfully and get question test successfully",
23
  "sumaryjd_text": sumaryjd_text}
24
  else:
25
  return {"message": "Please upload only .txt file", "error": str(e)}
26
  except Exception as e:
27
  return {"message": "Please upload only .txt file", "error": str(e)}
28
+
app/modules/question_tests_retrieval/models/question_tests_logic.py CHANGED
@@ -5,8 +5,10 @@ from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
  from langchain.evaluation import load_evaluator
6
  from langchain.evaluation import EmbeddingDistance
7
 
8
- from app.modules.crud_question_test.models.crud_question_tests import get_all_question_tests, get_question_test_url_by_description
 
9
  from app.configs.database import firebase_bucket
 
10
 
11
  # Import API key
12
  load_dotenv()
@@ -19,45 +21,84 @@ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
19
  embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
20
  gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- def compare_vector(vector_extract, vector_des):
24
- maxnimun_value = 2
25
- for item in vector_des:
26
- two_object = (vector_extract, item)
27
- x = gemini_evaluator.evaluate_strings(prediction=two_object[0], reference=two_object[1])
28
- if x.get('score') < maxnimun_value:
29
- maxnimun_value = x.get('score')
30
- des_item_choose = item
31
- if maxnimun_value == 2:
32
- return False
33
- elif maxnimun_value < 0.3:
34
- return des_item_choose
35
- else:
36
- return False
 
37
 
38
- def download_question_test(question_test_url):
39
  # check folder exist
40
  if not os.path.exists('data/question_tests'):
41
  os.makedirs('data/question_tests')
42
  # download file from firebase storage using "gs://" link
43
- name_bucket = question_test_url.split(f"gs://{firebase_bucket.name}/")[1]
44
- blob = firebase_bucket.blob(name_bucket)
45
- blob.download_to_filename(f'data/question_tests/{name_bucket}')
 
 
46
  return True
47
-
48
 
49
 
50
- def get_question_test(text):
51
- all_question_tests = get_all_question_tests()
52
- value_in_des = []
53
- for item in all_question_tests:
54
- value_in_des.append(item['question_tests_description'])
55
- des_item_choose = compare_vector(text, value_in_des)
56
- if des_item_choose == False:
57
- return "No question test found"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  else:
59
- question_test_url = get_question_test_url_by_description(des_item_choose)
60
- if download_question_test(question_test_url):
61
- return True
62
- else:
63
- return False
 
5
  from langchain.evaluation import load_evaluator
6
  from langchain.evaluation import EmbeddingDistance
7
 
8
+ from app.modules.crud_question_test.models.crud_question_tests import get_question_test_by_id
9
+ from app.modules.question_tests_retrieval.models.text2vector import text2vector
10
  from app.configs.database import firebase_bucket
11
+ from app.configs.qdrant_db import qdrant_client
12
 
13
  # Import API key
14
  load_dotenv()
 
21
  embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
22
  gemini_evaluator = load_evaluator("embedding_distance", distance_metric=EmbeddingDistance.COSINE, embeddings=embedding_model)
23
 
24
+ # def compare_vector(vector_extract, vector_des):
25
+ # maxnimun_value = 2
26
+ # for item in vector_des:
27
+ # two_object = (vector_extract, item)
28
+ # x = gemini_evaluator.evaluate_strings(prediction=two_object[0], reference=two_object[1])
29
+ # if x.get('score') < maxnimun_value:
30
+ # maxnimun_value = x.get('score')
31
+ # des_item_choose = item
32
+ # if maxnimun_value == 2:
33
+ # return False
34
+ # elif maxnimun_value < 0.3:
35
+ # return des_item_choose
36
+ # else:
37
+ # return False
38
+
39
+ def compare_vector(description_vector, max_number_of_points=3):
40
+ similarity_list = qdrant_client.search(
41
+ collection_name="question_tests",
42
+ query_vector=description_vector,
43
+ limit=max_number_of_points,
44
+ with_vectors=False,
45
+ with_payload=True,
46
+ )
47
 
48
+ formatted_similarity_list = []
49
+ for point in similarity_list:
50
+ formatted_similarity_list.append({"id": point.payload.get("id"), "score": point.score})
51
+
52
+ return formatted_similarity_list
53
+
54
+ # def download_question_test(question_test_url):
55
+ # # check folder exist
56
+ # if not os.path.exists('data/question_tests'):
57
+ # os.makedirs('data/question_tests')
58
+ # # download file from firebase storage using "gs://" link
59
+ # name_bucket = question_test_url.split(f"gs://{firebase_bucket.name}/")[1]
60
+ # blob = firebase_bucket.blob(name_bucket)
61
+ # blob.download_to_filename(f'data/question_tests/{name_bucket}')
62
+ # return True
63
 
64
+ def download_question_test(question_test_url_list):
65
  # check folder exist
66
  if not os.path.exists('data/question_tests'):
67
  os.makedirs('data/question_tests')
68
  # download file from firebase storage using "gs://" link
69
+ for url in question_test_url_list:
70
+ name_bucket = url.split(f"gs://{firebase_bucket.name}/")[1]
71
+ blob = firebase_bucket.blob(name_bucket)
72
+ blob.download_to_filename(f'data/question_tests/{name_bucket}')
73
+
74
  return True
 
75
 
76
 
77
+ # def get_question_test(text):
78
+ # all_question_tests = get_all_question_tests()
79
+ # value_in_des = []
80
+ # for item in all_question_tests:
81
+ # value_in_des.append(item['question_tests_description'])
82
+ # des_item_choose = compare_vector(text, value_in_des)
83
+ # if des_item_choose == False:
84
+ # return "No question test found"
85
+ # else:
86
+ # question_test_url = get_question_test_url_by_description(des_item_choose)
87
+ # if download_question_test(question_test_url):
88
+ # return True
89
+ # else:
90
+ # return False
91
+
92
+ def get_question_tests(text):
93
+ # Get formatted similarity list
94
+ formatted_similarity_list = compare_vector(text2vector(text))
95
+ # Get corresponding document url in Firebase and download them
96
+ question_test_url_list = []
97
+ for point in formatted_similarity_list:
98
+ id = point.get("id")
99
+ question_test_url_list.append(get_question_test_by_id(id).get("question_tests_url"))
100
+
101
+ if download_question_test(question_test_url_list):
102
+ return True
103
  else:
104
+ return False