MillMin commited on
Commit
4dec676
2 Parent(s): 140f52a e5d71c1
.gitignore CHANGED
@@ -2,3 +2,6 @@ venv/
2
  .env
3
  __pycache__/
4
  credentials/
 
 
 
 
2
  .env
3
  __pycache__/
4
  credentials/
5
+ data/CV
6
+ data/JD
7
+ data/QUESTION
app/configs/qdrant_db.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from qdrant_client import QdrantClient
2
+ from qdrant_client.http import models
3
+
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ qdrant_client = QdrantClient(
10
+ url = os.getenv("QDRANT_URL"),
11
+ api_key = os.getenv("QDRANT_API_KEY"),
12
+ )
13
+
14
+ try:
15
+ collection_info = qdrant_client.get_collection("question_tests")
16
+ except Exception as e:
17
+ qdrant_client.create_collection(
18
+ collection_name="question_tests",
19
+ vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
20
+ )
21
+
22
+ print("Qdrant Database connected")
23
+
app/modules/__init__.py CHANGED
@@ -1,10 +1,12 @@
1
  from fastapi import APIRouter
2
  from app.modules.question_tests_retrieval import qtretrieval_router
3
  from app.modules.crud_question_test import crud_question_tests_router
 
4
 
5
  modules_router = APIRouter(prefix="/modules", tags=["modules"])
6
  modules_router.include_router(qtretrieval_router)
7
  modules_router.include_router(crud_question_tests_router)
 
8
 
9
  @modules_router.get("/")
10
  async def index():
 
1
  from fastapi import APIRouter
2
  from app.modules.question_tests_retrieval import qtretrieval_router
3
  from app.modules.crud_question_test import crud_question_tests_router
4
+ from app.modules.matching_cv import cvmatching_router
5
 
6
  modules_router = APIRouter(prefix="/modules", tags=["modules"])
7
  modules_router.include_router(qtretrieval_router)
8
  modules_router.include_router(crud_question_tests_router)
9
+ modules_router.include_router(cvmatching_router)
10
 
11
  @modules_router.get("/")
12
  async def index():
app/modules/crud_question_test/__init__.py CHANGED
@@ -20,7 +20,7 @@ async def add_question_test(description: str, role: str, file_question_tests: An
20
  # check if file is pdf or json
21
  if file_question_tests.content_type == "application/pdf":
22
  # create a new document
23
- if create_question_test({"description": description, "role": role, "question_tests": file_question_tests}):
24
  return {"message": "Question test added successfully"}
25
  else:
26
  return {"message": "Error"}
 
20
  # check if file is pdf or json
21
  if file_question_tests.content_type == "application/pdf":
22
  # create a new document
23
+ if create_question_test({"question_tests_description": description, "question_tests_role": role, "question_tests_url": file_question_tests}):
24
  return {"message": "Question test added successfully"}
25
  else:
26
  return {"message": "Error"}
app/modules/crud_question_test/models/crud_question_tests.py CHANGED
@@ -1,5 +1,9 @@
1
  import uuid
2
  from app.configs.database import firebase_bucket, firebase_db
 
 
 
 
3
 
4
  # CRUD operation
5
  def upload_file_question_tests(file):
@@ -46,8 +50,19 @@ def create_question_test(data):
46
  file_url = upload_file_question_tests(file_question_tests)
47
  # add file url to data
48
  data["question_tests_url"] = file_url
 
49
  # Create a new document
50
- firebase_db.collection("question_tests").add(data)
 
 
 
 
 
 
 
 
 
 
51
  return True
52
 
53
  def update_question_test(id, data):
@@ -57,7 +72,7 @@ def update_question_test(id, data):
57
 
58
  def delete_question_test(id):
59
  # Delete a file from firebase storage
60
- file_url = get_question_test_by_id(id)["question_tests"]
61
  remove_file_question_tests(file_url)
62
  # Delete a document by id
63
  firebase_db.collection("question_tests").document(id).delete()
 
1
  import uuid
2
  from app.configs.database import firebase_bucket, firebase_db
3
+ from app.configs.qdrant_db import qdrant_client
4
+ from app.configs.qdrant_db import models
5
+ from app.modules.question_tests_retrieval.models.text2vector import text2vector
6
+
7
 
8
  # CRUD operation
9
  def upload_file_question_tests(file):
 
50
  file_url = upload_file_question_tests(file_question_tests)
51
  # add file url to data
52
  data["question_tests_url"] = file_url
53
+ question_tests_des = data["question_tests_description"]
54
  # Create a new document
55
+ document_ref = firebase_db.collection("question_tests").add(data)
56
+ document_id = document_ref[1].id
57
+
58
+ # Upload vector to Qdrant
59
+ collection_info = qdrant_client.get_collection('question_tests')
60
+ points_count = collection_info.points_count
61
+ description_vector = text2vector(question_tests_des)
62
+ payload = {"id": document_id}
63
+ point = models.PointStruct(id=points_count+1, payload=payload, vector=description_vector)
64
+ qdrant_client.upsert(collection_name="question_tests", points=[point])
65
+
66
  return True
67
 
68
  def update_question_test(id, data):
 
72
 
73
  def delete_question_test(id):
74
  # Delete a file from firebase storage
75
+ file_url = get_question_test_by_id(id)["question_tests_url"]
76
  remove_file_question_tests(file_url)
77
  # Delete a document by id
78
  firebase_db.collection("question_tests").document(id).delete()
app/modules/matching_cv/__init__.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import docx
2
+
3
+ from fastapi import APIRouter
4
+ from app.modules.matching_cv.models.match_cv_jd_model import Match_JD_CV_Model
5
+
6
+ from app.modules.matching_cv.models.matching_cv_logic import result_matching_cv_jd, load_jd_from_id
7
+
8
+ cvmatching_router = APIRouter(prefix="/cvmatching", tags=["cvmatching"])
9
+
10
+ @cvmatching_router.get("/")
11
+ async def index():
12
+ return {"message": "Welcome to CV matching page"}
13
+
14
+ @cvmatching_router.post("/matching")
15
+ # only upload .pdf or .docx file
16
+ async def matching_cv_jd(
17
+ jd_upload: Match_JD_CV_Model.jd = Match_JD_CV_Model.jd_default,
18
+ cv_upload: Match_JD_CV_Model.cv = Match_JD_CV_Model.cv_default):
19
+ try:
20
+ # take jd_upload and cv_upload type file
21
+ jd_upload_type = jd_upload.filename.split(".")[-1]
22
+ cv_upload_type = cv_upload.filename.split(".")[-1]
23
+ if jd_upload_type in ["txt"] and cv_upload_type in ["pdf", "docx"]:
24
+ jd_text = jd_upload.file.read().decode("utf-8")
25
+ if cv_upload_type == "docx":
26
+ cv_text = docx.Document(cv_upload.file).paragraphs
27
+ cv_text = "\n".join([i.text for i in cv_text])
28
+ elif cv_upload_type == "pdf":
29
+ return {"message": "This feature is not available yet"}
30
+ # check matching cv and jd
31
+ result = result_matching_cv_jd(cv_text, jd_text)
32
+ return {"result": result}
33
+ else:
34
+ return {"message": "Please upload only .txt for JD. And .pdf or .docx file for CV"}
35
+ except Exception as e:
36
+ return {"Error": str(e)}
app/modules/matching_cv/models/match_cv_jd_model.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File
2
+
3
+ class Match_JD_CV_Model:
4
+ jd = UploadFile
5
+ jd_default = File(..., description="Upload JD file", media_type=["text/plain"])
6
+ cv = UploadFile
7
+ cv_default = File(..., description="Upload CV file", media_type=["application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"])
app/modules/matching_cv/models/matching_cv_logic.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import docx
3
+ from dotenv import load_dotenv
4
+
5
+ # import prompt template
6
+ from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
7
+ from langchain_core.messages import SystemMessage
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+
10
+ # import the json oupput parser from the langchain core
11
+ from langchain_core.output_parsers import JsonOutputParser
12
+
13
+ # define the parser object
14
+ parser = JsonOutputParser()
15
+
16
+ # Import API key
17
+ load_dotenv()
18
+
19
+ # Define the google api key
20
+ os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
21
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
22
+
23
+ # def load cv from docx file
24
+ def load_cv_from_docx(file):
25
+ doc = docx.Document(file)
26
+ fullText = []
27
+ for para in doc.paragraphs:
28
+ fullText.append(para.text)
29
+ return '\n'.join(fullText)
30
+
31
+ # def matching cv and jd return percentage of matching using prompt template
32
+ def result_matching_cv_jd(cv_text, jd_text):
33
+ # create the prompt template
34
+ chat_template = ChatPromptTemplate.from_messages(
35
+ [
36
+ SystemMessage(
37
+ content=(
38
+ """
39
+ Given the following CV and JD, calculate the percentage match between the candidate's qualifications and the job requirements:
40
+ CV: {cv}
41
+ JD: {jd}
42
+ To determine the match percentage, analyze the skills and experience in the CV and compare them to the requirements outlined in the JD. Provide the final match percentage as a numeric value between 0-100%, along with a brief explanation of your analysis. Follow this json format: {"Skills Match": {"Required Skills": "","Candidate Skills": "","Match Percentage": "",}, "Experience Match": {"Required Experience": "","Candidate Experience": "","Match Percentage": "",}, "Overall Match Percentage:": ""}
43
+ """
44
+ )
45
+ ),
46
+ HumanMessagePromptTemplate.from_template(["{cv}", "{jd}"]),
47
+ ]
48
+ )
49
+
50
+ # create the chat message
51
+ chat_message = chat_template.format_messages(cv=cv_text, jd=jd_text)
52
+
53
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, convert_system_message_to_human=True, api_key=GOOGLE_API_KEY)
54
+ chain = llm | parser
55
+ result = chain.invoke(chat_message)
56
+
57
+ return result
58
+
59
+ def load_jd_from_id():
60
+ pass
app/modules/question_tests_retrieval/__init__.py CHANGED
@@ -22,6 +22,6 @@ async def send_jd(txt_file: Annotated[UploadFile, File(..., description="The JD
22
  return {"message": "Send JD successfully and get question test successfully",
23
  "sumaryjd_text": sumaryjd_text}
24
  else:
25
- return {"message": "Error"}
26
  except Exception as e:
27
- return {"message": "Error", "error": str(e)}
 
22
  return {"message": "Send JD successfully and get question test successfully",
23
  "sumaryjd_text": sumaryjd_text}
24
  else:
25
+ return {"message": "Please upload only .txt file", "error": str(e)}
26
  except Exception as e:
27
+ return {"message": "Please upload only .txt file", "error": str(e)}
app/modules/question_tests_retrieval/models/{text2tector.py → text2vector.py} RENAMED
File without changes
data/question_for_jd/Question_AI_Senior_1.json DELETED
@@ -1,355 +0,0 @@
1
- {
2
- "__count__": 30,
3
- "data": [
4
- {
5
- "id": "1",
6
- "question": "What is the concept of 'Curse of Dimensionality' in Machine Learning?",
7
- "choices": [
8
- "A. It refers to the difficulty in visualizing high-dimensional data.",
9
- "B. It refers to the phenomenon where the feature space becomes increasingly sparse for an increasing number of dimensions of a fixed-size training dataset.",
10
- "C. It refers to the computational complexity of mathematical operations in high-dimensional spaces.",
11
- "D. It refers to the overfitting that occurs when the model is too complex."
12
- ],
13
- "explanation": "The 'Curse of Dimensionality' refers to various phenomena that arise when analyzing and organizing data in high-dimensional spaces (often with hundreds or thousands of dimensions) that do not occur in low-dimensional settings such as the three-dimensional physical space of everyday experience.",
14
- "answer": "B",
15
- "level": "Senior",
16
- "domain": "AI"
17
- },
18
- {
19
- "id": "2",
20
- "question": "What is the difference between 'one-hot encoding' and 'label encoding'?",
21
- "choices": [
22
- "A. 'One-hot encoding' is used for ordinal variables, while 'label encoding' is used for nominal variables.",
23
- "B. 'Label encoding' is used for ordinal variables, while 'one-hot encoding' is used for nominal variables.",
24
- "C. Both 'one-hot encoding' and 'label encoding' are used for ordinal variables.",
25
- "D. Both 'one-hot encoding' and 'label encoding' are used for nominal variables."
26
- ],
27
- "explanation": "'Label encoding' and 'one-hot encoding' are two common methods used to convert categorical data into numerical data. 'Label encoding' assigns each unique category in a categorical variable with an integer. No new columns are created. On the other hand, 'one-hot encoding' creates new (binary) columns, indicating the presence of each possible value from the original data.",
28
- "answer": "B",
29
- "level": "Senior",
30
- "domain": "AI"
31
- },
32
- {
33
- "id": "3",
34
- "question": "What is 'Early Stopping' in model training?",
35
- "choices": [
36
- "A. It's a form of regularization used to avoid overfitting when training a learner with an iterative method.",
37
- "B. It's a technique used to speed up training by stopping the training process before it completes all iterations.",
38
- "C. It's a method used to stop training when the model's performance starts to decrease.",
39
- "D. It's a technique used to stop training when the model's performance does not improve on an held-out validation set."
40
- ],
41
- "explanation": "'Early stopping' is a form of regularization used to avoid overfitting when training a machine learning model with an iterative method, such as gradient descent. Such methods update the model's parameters (such as the weights in a neural network) iteratively, and early stopping halts this iterative process when the model's performance stops improving on a held-out validation dataset.",
42
- "answer": "A",
43
- "level": "Senior",
44
- "domain": "AI"
45
- },
46
- {
47
- "id": "4",
48
- "question": "What is the difference between 'Random Forest' and 'Gradient Boosting' algorithms?",
49
- "choices": [
50
- "A. 'Random Forest' is a bagging algorithm, while 'Gradient Boosting' is a boosting algorithm.",
51
- "B. 'Gradient Boosting' is a bagging algorithm, while 'Random Forest' is a boosting algorithm.",
52
- "C. Both 'Random Forest' and 'Gradient Boosting' are bagging algorithms.",
53
- "D. Both 'Random Forest' and 'Gradient Boosting' are boosting algorithms."
54
- ],
55
- "explanation": "'Random Forest' is a bagging algorithm and 'Gradient Boosting' is a boosting algorithm. Both are ensemble methods, but they combine the models in different ways. In 'Random Forest', each tree in the ensemble is built from a sample drawn with replacement (i.e., a bootstrap sample) from the training set. In 'Gradient Boosting', each new tree is fit on a modified version of the original data set.",
56
- "answer": "A",
57
- "level": "Senior",
58
- "domain": "AI"
59
- },
60
- {
61
- "id": "5",
62
- "question": "What is 'Transfer Learning' in the context of Machine Learning?",
63
- "choices": [
64
- "A. It's a technique where a pre-trained model is used on a new problem.",
65
- "B. It's a technique to transfer knowledge from one model to another.",
66
- "C. It's a technique to transfer knowledge from one problem domain to another.",
67
- "D. All of the above."
68
- ],
69
- "explanation": "'Transfer Learning' is a research problem in machine learning that focuses on storing knowledge gained while solving one problem and applying it to a different but related problem. For example, knowledge gained while learning to recognize cars could apply when trying to recognize trucks.",
70
- "answer": "D",
71
- "level": "Senior",
72
- "domain": "AI"
73
- },
74
- {
75
- "id": "6",
76
- "question": "What is the purpose of 'ReLU' in a neural network?",
77
- "choices": [
78
- "A. To introduce non-linearity in the neural network.",
79
- "B. To normalize the output of the neural network.",
80
- "C. To speed up the training process of the neural network.",
81
- "D. To prevent overfitting in the neural network."
82
- ],
83
- "explanation": "'ReLU' stands for 'Rectified Linear Unit'. It is the most commonly used activation function in neural networks and deep learning models. The function returns 0 if it receives any negative input, but for any positive value 'x' it returns that value back. It's used to introduce non-linearity in the neural network.",
84
- "answer": "A",
85
- "level": "Senior",
86
- "domain": "AI"
87
- },
88
- {
89
- "id": "7",
90
- "question": "What is 'Batch Normalization' in the context of Neural Networks?",
91
- "choices": [
92
- "A. It's a technique to provide any layer in a neural network with inputs that are zero mean/unit variance.",
93
- "B. It's a technique to normalize the output of a previous activation layer by subtracting the batch mean and dividing by the batch standard deviation.",
94
- "C. It's a technique to make the weights of a neural network have zero mean and unit variance.",
95
- "D. Both A and B."
96
- ],
97
- "explanation": "'Batch Normalization' is a technique to provide any layer in a neural network with inputs that are zero mean/unit variance, and it is a technique to normalize the output of the previous activation layer by subtracting the batch mean and dividing by the batch standard deviation. This technique helps to speed up learning in deep neural networks by reducing internal covariate shift, and it has become a standard component of most state-of-the-art neural networks.",
98
- "answer": "D",
99
- "level": "Senior",
100
- "domain": "AI"
101
- },
102
- {
103
- "id": "8",
104
- "question": "What is the purpose of 'L1' and 'L2' regularization?",
105
- "choices": [
106
- "A. They are techniques to prevent overfitting in a machine learning model.",
107
- "B. They are techniques to increase the speed of training a machine learning model.",
108
- "C. They are techniques to increase the complexity of a machine learning model.",
109
- "D. They are techniques to decrease the complexity of a machine learning model."
110
- ],
111
- "explanation": "'L1' and 'L2' are regularization techniques used to prevent overfitting in a machine learning model by adding a penalty term to the loss function. The penalty term encourages the model to have smaller weights, which makes the model simpler and thus less likely to overfit.",
112
- "answer": "A",
113
- "level": "Senior",
114
- "domain": "AI"
115
- },
116
- {
117
- "id": "9",
118
- "question": "What is 'Ensemble Learning' in the context of Machine Learning?",
119
- "choices": [
120
- "A. It's a technique where multiple models are trained to solve the same problem and combined to get better results.",
121
- "B. It's a technique where one model is trained to solve multiple problems.",
122
- "C. It's a technique where the model is trained on an ensemble of different datasets.",
123
- "D. It's a technique where the model is trained multiple times on the same dataset."
124
- ],
125
- "explanation": "'Ensemble Learning' is a machine learning paradigm where multiple models (often called 'weak learners') are trained to solve the same problem and combined to get better results. The main hypothesis is that when weak models are correctly combined we can obtain more accurate and/or robust models.",
126
- "answer": "A",
127
- "level": "Senior",
128
- "domain": "AI"
129
- },
130
- {
131
- "id": "10",
132
- "question": "What is the difference between 'Ridge' and 'Lasso' regression?",
133
- "choices": [
134
- "A. 'Ridge' regression uses L1 regularization while 'Lasso' regression uses L2 regularization.",
135
- "B. 'Lasso' regression uses L1 regularization while 'Ridge' regression uses L2 regularization.",
136
- "C. Both 'Ridge' and 'Lasso' regression use L1 regularization.",
137
- "D. Both 'Ridge' and 'Lasso' regression use L2 regularization."
138
- ],
139
- "explanation": "'Ridge' and 'Lasso' regression are two types of linear regression models that use different types of regularization. 'Ridge' regression uses L2 regularization, which adds a penalty equal to the square of the magnitude of coefficients. On the other hand, 'Lasso' regression uses L1 regularization, which adds a penalty equal to the absolute value of the magnitude of coefficients.",
140
- "answer": "B",
141
- "level": "Senior",
142
- "domain": "AI"
143
- },
144
- {
145
- "id": "11",
146
- "question": "What is 'Data Augmentation' in the context of Machine Learning?",
147
- "choices": [
148
- "A. It's a technique to artificially create new training data from existing training data.",
149
- "B. It's a technique to increase the size of the dataset by collecting more data.",
150
- "C. It's a technique to clean the training data.",
151
- "D. It's a technique to reduce the size of the dataset."
152
- ],
153
- "explanation": "'Data Augmentation' is a strategy that enables practitioners to significantly increase the diversity of data available for training models, without actually collecting new data. Data augmentation techniques such as cropping, padding, and horizontal flipping are commonly used to train large neural networks.",
154
- "answer": "A",
155
- "level": "Senior",
156
- "domain": "AI"
157
- },
158
- {
159
- "id": "12",
160
- "question": "What is the purpose of 'Max Pooling' in a Convolutional Neural Network (CNN)?",
161
- "choices": [
162
- "A. To reduce the spatial dimensions of the output volume.",
163
- "B. To increase the spatial dimensions of the output volume.",
164
- "C. To normalize the output of the previous activation layer.",
165
- "D. To introduce non-linearity in the neural network."
166
- ],
167
- "explanation": "'Max Pooling' is a pooling operation that is typically added to CNNs following individual convolutional layers. When added to a model, max pooling reduces the dimensionality of images by reducing the number of pixels in the output from the previous convolutional layer.",
168
- "answer": "A",
169
- "level": "Senior",
170
- "domain": "AI"
171
- },
172
- {
173
- "id": "13",
174
- "question": "What is the difference between 'Batch Gradient Descent' and 'Mini-Batch Gradient Descent'?",
175
- "choices": [
176
- "A. 'Batch Gradient Descent' uses the entire training set to compute the gradient of the cost function, while 'Mini-Batch Gradient Descent' uses a subset of the training set.",
177
- "B. 'Mini-Batch Gradient Descent' uses the entire training set to compute the gradient of the cost function, while 'Batch Gradient Descent' uses a subset of the training set.",
178
- "C. Both 'Batch Gradient Descent' and 'Mini-Batch Gradient Descent' use the entire training set to compute the gradient of the cost function.",
179
- "D. Both 'Batch Gradient Descent' and 'Mini-Batch Gradient Descent' use a subset of the training set to compute the gradient of the cost function."
180
- ],
181
- "explanation": "'Batch Gradient Descent' uses the entire training set to compute the gradient of the cost function, while 'Mini-Batch Gradient Descent' uses a subset of the training set. With 'Mini-Batch Gradient Descent', you can replace the actual gradient (calculated from the entire data set) with an estimate of the gradient (calculated from a randomly selected subset of the data). Especially in big data applications, this can help to speed up gradient-based optimization algorithms significantly.",
182
- "answer": "A",
183
- "level": "Senior",
184
- "domain": "AI"
185
- },
186
- {
187
- "id": "14",
188
- "question": "What is 'Principal Component Analysis' (PCA) used for?",
189
- "choices": [
190
- "A. PCA is used to compress the data by reducing the number of dimensions.",
191
- "B. PCA is used to decompress the data by increasing the number of dimensions.",
192
- "C. PCA is used to classify the data into different categories.",
193
- "D. PCA is used to cluster the data into different groups."
194
- ],
195
- "explanation": "'Principal Component Analysis' (PCA) is a dimensionality reduction technique that is commonly used in machine learning and data visualization. It can be thought of as a projection method where data with 'm' columns (features) is projected into a subspace with 'm' or fewer columns, whilst retaining the essence of the original data.",
196
- "answer": "A",
197
- "level": "Senior",
198
- "domain": "AI"
199
- },
200
- {
201
- "id": "15",
202
- "question": "What is the purpose of 'Word Embeddings' in Natural Language Processing (NLP)?",
203
- "choices": [
204
- "A. To map words or phrases from the vocabulary to vectors of real numbers.",
205
- "B. To map words or phrases from the vocabulary to a dictionary of words.",
206
- "C. To convert the words in the vocabulary to lower case.",
207
- "D. To remove stop words from the vocabulary."
208
- ],
209
- "explanation": "'Word Embeddings' are a type of word representation that allows words with similar meaning to have a similar representation. They are a distributed representation for text that is perhaps one of the key breakthroughs for the impressive performance of deep learning methods on challenging natural language processing problems.",
210
- "answer": "A",
211
- "level": "Senior",
212
- "domain": "AI"
213
- },
214
- {
215
- "id": "16",
216
- "question": "What is the difference between 'Long Short Term Memory' (LSTM) and 'Gated Recurrent Unit' (GRU)?",
217
- "choices": [
218
- "A. LSTM has three gates (input, output, forget) while GRU has two gates (reset, update).",
219
- "B. GRU has three gates (input, output, forget) while LSTM has two gates (reset, update).",
220
- "C. Both LSTM and GRU have three gates (input, output, forget).",
221
- "D. Both LSTM and GRU have two gates (reset, update)."
222
- ],
223
- "explanation": "Both LSTM (Long Short Term Memory) and GRU (Gated Recurrent Unit) are types of recurrent neural network (RNN) architecture used in deep learning. The key difference between them is that LSTM has three gates (input, output, forget), while GRU has two gates (reset, update). This makes GRUs a simpler and more efficient model for certain tasks.",
224
- "answer": "A",
225
- "level": "Senior",
226
- "domain": "AI"
227
- },
228
- {
229
- "id": "17",
230
- "question": "What is 'Autoencoder' in the context of Machine Learning?",
231
- "choices": [
232
- "A. It's a type of artificial neural network used for learning efficient codings of input data.",
233
- "B. It's a type of artificial neural network used for generating new data that is similar to the input data.",
234
- "C. It's a type of artificial neural network used for classifying input data into different categories.",
235
- "D. It's a type of artificial neural network used for clustering input data into different groups."
236
- ],
237
- "explanation": "An 'Autoencoder' is a type of artificial neural network used for learning efficient codings of input data. It's typically used for the purpose of dimensionality reduction and feature learning.",
238
- "answer": "A",
239
- "level": "Senior",
240
- "domain": "AI"
241
- },
242
- {
243
- "id": "18",
244
- "question": "What is the purpose of 'Attention Mechanism' in the context of Machine Learning?",
245
- "choices": [
246
- "A. It's used to focus on certain parts of the input data that are more relevant to the task at hand.",
247
- "B. It's used to pay equal attention to all parts of the input data.",
248
- "C. It's used to ignore certain parts of the input data that are not relevant to the task at hand.",
249
- "D. Both A and C."
250
- ],
251
- "explanation": "The 'Attention Mechanism' is a technique used in machine learning models, especially in deep learning models, to focus on certain parts of the input data that are more relevant to the task at hand, and to ignore other parts. It's particularly useful in tasks such as machine translation, where it's important to focus on the right words in the input sequence when generating the output sequence.",
252
- "answer": "D",
253
- "level": "Senior",
254
- "domain": "AI"
255
- },
256
- {
257
- "id": "19",
258
- "question": "What is 'Reinforcement Learning' in the context of Machine Learning?",
259
- "choices": [
260
- "A. It's a type of machine learning where an agent learns to make decisions by taking actions in an environment to maximize some notion of cumulative reward.",
261
- "B. It's a type of machine learning where an agent learns to make decisions based on a fixed set of rules.",
262
- "C. It's a type of machine learning where an agent learns to make decisions based on a predefined set of actions.",
263
- "D. It's a type of machine learning where an agent learns to make decisions based on the actions taken by other agents."
264
- ],
265
- "explanation": "'Reinforcement Learning' is a type of machine learning where an agent learns to make decisions by taking actions in an environment to maximize some notion of cumulative reward. The agent learns from the consequences of its actions, rather than from being explicitly taught and it selects its actions on basis of its past experiences (exploitation) and also by new choices (exploration).",
266
- "answer": "A",
267
- "level": "Senior",
268
- "domain": "AI"
269
- },
270
- {
271
- "id": "20",
272
- "question": "What is 'Generative Adversarial Network' (GAN) in the context of Machine Learning?",
273
- "choices": [
274
- "A. It's a class of machine learning systems invented by Ian Goodfellow and his colleagues in 2014.",
275
- "B. It's a class of machine learning systems where two neural networks contest with each other in a game.",
276
- "C. It's a class of machine learning systems where one neural network, called the generator, generates new data instances, while the other, the discriminator, evaluates them for authenticity.",
277
- "D. All of the above."
278
- ],
279
- "explanation": "'Generative Adversarial Network' (GAN) is a class of machine learning systems invented by Ian Goodfellow and his colleagues in 2014. Two neural networks contest with each other in a game. Given a training set, this technique learns to generate new data with the same statistics as the training set. For example, a GAN trained on photographs can generate new photographs that look at least superficially authentic to human observers, having many realistic characteristics.",
280
- "answer": "D",
281
- "level": "Senior",
282
- "domain": "AI"
283
- },
284
- {
285
- "id": "21",
286
- "question": "Write a Python function to implement a basic 'K-Nearest Neighbors' (KNN) model.",
287
- "explanation": "The function should take a dataset and a value for 'K' as arguments and return a trained KNN model.",
288
- "level": "Senior",
289
- "domain": "AI"
290
- },
291
- {
292
- "id": "22",
293
- "question": "Write a Python function to implement a basic 'Naive Bayes' model.",
294
- "explanation": "The function should take a dataset as an argument and return a trained Naive Bayes model.",
295
- "level": "Senior",
296
- "domain": "AI"
297
- },
298
- {
299
- "id": "23",
300
- "question": "Write a Python function to implement a basic 'Random Forest' model.",
301
- "explanation": "The function should take a dataset as an argument and return a trained Random Forest model.",
302
- "level": "Senior",
303
- "domain": "AI"
304
- },
305
- {
306
- "id": "24",
307
- "question": "Write a Python function to implement a basic 'Gradient Boosting' model.",
308
- "explanation": "The function should take a dataset as an argument and return a trained Gradient Boosting model.",
309
- "level": "Senior",
310
- "domain": "AI"
311
- },
312
- {
313
- "id": "25",
314
- "question": "Write a Python function to implement a basic 'Deep Neural Network' (DNN) model.",
315
- "explanation": "The function should take a dataset as an argument and return a trained DNN model.",
316
- "level": "Senior",
317
- "domain": "AI"
318
- },
319
- {
320
- "id": "26",
321
- "question": "Write a Python function to implement a basic 'Convolutional Neural Network' (CNN) model.",
322
- "explanation": "The function should take a dataset as an argument and return a trained CNN model.",
323
- "level": "Senior",
324
- "domain": "AI"
325
- },
326
- {
327
- "id": "27",
328
- "question": "Write a Python function to implement a basic 'Decision Tree' model.",
329
- "explanation": "The function should take a dataset as an argument and return a trained decision tree model.",
330
- "level": "Senior",
331
- "domain": "AI"
332
- },
333
- {
334
- "id": "28",
335
- "question": "Write a Python function to implement a basic 'Support Vector Machine' (SVM) model.",
336
- "explanation": "The function should take a dataset as an argument and return a trained SVM model.",
337
- "level": "Senior",
338
- "domain": "AI"
339
- },
340
- {
341
- "id": "29",
342
- "question": "Write a Python function to implement a basic 'Linear Regression' model.",
343
- "explanation": "The function should take a dataset as an argument and return a trained linear regression model.",
344
- "level": "Senior",
345
- "domain": "AI"
346
- },
347
- {
348
- "id": "30",
349
- "question": "Write a Python function to implement a basic 'Logistic Regression' model.",
350
- "explanation": "The function should take a dataset as an argument and return a trained logistic regression model.",
351
- "level": "Senior",
352
- "domain": "AI"
353
- }
354
- ]
355
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/question_tests/6322f947_dc0d_41d2_a321_e0f0e57bf070_Question_AI_Senior_1.json DELETED
@@ -1,355 +0,0 @@
1
- {
2
- "__count__": 30,
3
- "data": [
4
- {
5
- "id": "1",
6
- "question": "What is the concept of 'Curse of Dimensionality' in Machine Learning?",
7
- "choices": [
8
- "A. It refers to the difficulty in visualizing high-dimensional data.",
9
- "B. It refers to the phenomenon where the feature space becomes increasingly sparse for an increasing number of dimensions of a fixed-size training dataset.",
10
- "C. It refers to the computational complexity of mathematical operations in high-dimensional spaces.",
11
- "D. It refers to the overfitting that occurs when the model is too complex."
12
- ],
13
- "explanation": "The 'Curse of Dimensionality' refers to various phenomena that arise when analyzing and organizing data in high-dimensional spaces (often with hundreds or thousands of dimensions) that do not occur in low-dimensional settings such as the three-dimensional physical space of everyday experience.",
14
- "answer": "B",
15
- "level": "Senior",
16
- "domain": "AI"
17
- },
18
- {
19
- "id": "2",
20
- "question": "What is the difference between 'one-hot encoding' and 'label encoding'?",
21
- "choices": [
22
- "A. 'One-hot encoding' is used for ordinal variables, while 'label encoding' is used for nominal variables.",
23
- "B. 'Label encoding' is used for ordinal variables, while 'one-hot encoding' is used for nominal variables.",
24
- "C. Both 'one-hot encoding' and 'label encoding' are used for ordinal variables.",
25
- "D. Both 'one-hot encoding' and 'label encoding' are used for nominal variables."
26
- ],
27
- "explanation": "'Label encoding' and 'one-hot encoding' are two common methods used to convert categorical data into numerical data. 'Label encoding' assigns each unique category in a categorical variable with an integer. No new columns are created. On the other hand, 'one-hot encoding' creates new (binary) columns, indicating the presence of each possible value from the original data.",
28
- "answer": "B",
29
- "level": "Senior",
30
- "domain": "AI"
31
- },
32
- {
33
- "id": "3",
34
- "question": "What is 'Early Stopping' in model training?",
35
- "choices": [
36
- "A. It's a form of regularization used to avoid overfitting when training a learner with an iterative method.",
37
- "B. It's a technique used to speed up training by stopping the training process before it completes all iterations.",
38
- "C. It's a method used to stop training when the model's performance starts to decrease.",
39
- "D. It's a technique used to stop training when the model's performance does not improve on an held-out validation set."
40
- ],
41
- "explanation": "'Early stopping' is a form of regularization used to avoid overfitting when training a machine learning model with an iterative method, such as gradient descent. Such methods update the model's parameters (such as the weights in a neural network) iteratively, and early stopping halts this iterative process when the model's performance stops improving on a held-out validation dataset.",
42
- "answer": "A",
43
- "level": "Senior",
44
- "domain": "AI"
45
- },
46
- {
47
- "id": "4",
48
- "question": "What is the difference between 'Random Forest' and 'Gradient Boosting' algorithms?",
49
- "choices": [
50
- "A. 'Random Forest' is a bagging algorithm, while 'Gradient Boosting' is a boosting algorithm.",
51
- "B. 'Gradient Boosting' is a bagging algorithm, while 'Random Forest' is a boosting algorithm.",
52
- "C. Both 'Random Forest' and 'Gradient Boosting' are bagging algorithms.",
53
- "D. Both 'Random Forest' and 'Gradient Boosting' are boosting algorithms."
54
- ],
55
- "explanation": "'Random Forest' is a bagging algorithm and 'Gradient Boosting' is a boosting algorithm. Both are ensemble methods, but they combine the models in different ways. In 'Random Forest', each tree in the ensemble is built from a sample drawn with replacement (i.e., a bootstrap sample) from the training set. In 'Gradient Boosting', each new tree is fit on a modified version of the original data set.",
56
- "answer": "A",
57
- "level": "Senior",
58
- "domain": "AI"
59
- },
60
- {
61
- "id": "5",
62
- "question": "What is 'Transfer Learning' in the context of Machine Learning?",
63
- "choices": [
64
- "A. It's a technique where a pre-trained model is used on a new problem.",
65
- "B. It's a technique to transfer knowledge from one model to another.",
66
- "C. It's a technique to transfer knowledge from one problem domain to another.",
67
- "D. All of the above."
68
- ],
69
- "explanation": "'Transfer Learning' is a research problem in machine learning that focuses on storing knowledge gained while solving one problem and applying it to a different but related problem. For example, knowledge gained while learning to recognize cars could apply when trying to recognize trucks.",
70
- "answer": "D",
71
- "level": "Senior",
72
- "domain": "AI"
73
- },
74
- {
75
- "id": "6",
76
- "question": "What is the purpose of 'ReLU' in a neural network?",
77
- "choices": [
78
- "A. To introduce non-linearity in the neural network.",
79
- "B. To normalize the output of the neural network.",
80
- "C. To speed up the training process of the neural network.",
81
- "D. To prevent overfitting in the neural network."
82
- ],
83
- "explanation": "'ReLU' stands for 'Rectified Linear Unit'. It is the most commonly used activation function in neural networks and deep learning models. The function returns 0 if it receives any negative input, but for any positive value 'x' it returns that value back. It's used to introduce non-linearity in the neural network.",
84
- "answer": "A",
85
- "level": "Senior",
86
- "domain": "AI"
87
- },
88
- {
89
- "id": "7",
90
- "question": "What is 'Batch Normalization' in the context of Neural Networks?",
91
- "choices": [
92
- "A. It's a technique to provide any layer in a neural network with inputs that are zero mean/unit variance.",
93
- "B. It's a technique to normalize the output of a previous activation layer by subtracting the batch mean and dividing by the batch standard deviation.",
94
- "C. It's a technique to make the weights of a neural network have zero mean and unit variance.",
95
- "D. Both A and B."
96
- ],
97
- "explanation": "'Batch Normalization' is a technique to provide any layer in a neural network with inputs that are zero mean/unit variance, and it is a technique to normalize the output of the previous activation layer by subtracting the batch mean and dividing by the batch standard deviation. This technique helps to speed up learning in deep neural networks by reducing internal covariate shift, and it has become a standard component of most state-of-the-art neural networks.",
98
- "answer": "D",
99
- "level": "Senior",
100
- "domain": "AI"
101
- },
102
- {
103
- "id": "8",
104
- "question": "What is the purpose of 'L1' and 'L2' regularization?",
105
- "choices": [
106
- "A. They are techniques to prevent overfitting in a machine learning model.",
107
- "B. They are techniques to increase the speed of training a machine learning model.",
108
- "C. They are techniques to increase the complexity of a machine learning model.",
109
- "D. They are techniques to decrease the complexity of a machine learning model."
110
- ],
111
- "explanation": "'L1' and 'L2' are regularization techniques used to prevent overfitting in a machine learning model by adding a penalty term to the loss function. The penalty term encourages the model to have smaller weights, which makes the model simpler and thus less likely to overfit.",
112
- "answer": "A",
113
- "level": "Senior",
114
- "domain": "AI"
115
- },
116
- {
117
- "id": "9",
118
- "question": "What is 'Ensemble Learning' in the context of Machine Learning?",
119
- "choices": [
120
- "A. It's a technique where multiple models are trained to solve the same problem and combined to get better results.",
121
- "B. It's a technique where one model is trained to solve multiple problems.",
122
- "C. It's a technique where the model is trained on an ensemble of different datasets.",
123
- "D. It's a technique where the model is trained multiple times on the same dataset."
124
- ],
125
- "explanation": "'Ensemble Learning' is a machine learning paradigm where multiple models (often called 'weak learners') are trained to solve the same problem and combined to get better results. The main hypothesis is that when weak models are correctly combined we can obtain more accurate and/or robust models.",
126
- "answer": "A",
127
- "level": "Senior",
128
- "domain": "AI"
129
- },
130
- {
131
- "id": "10",
132
- "question": "What is the difference between 'Ridge' and 'Lasso' regression?",
133
- "choices": [
134
- "A. 'Ridge' regression uses L1 regularization while 'Lasso' regression uses L2 regularization.",
135
- "B. 'Lasso' regression uses L1 regularization while 'Ridge' regression uses L2 regularization.",
136
- "C. Both 'Ridge' and 'Lasso' regression use L1 regularization.",
137
- "D. Both 'Ridge' and 'Lasso' regression use L2 regularization."
138
- ],
139
- "explanation": "'Ridge' and 'Lasso' regression are two types of linear regression models that use different types of regularization. 'Ridge' regression uses L2 regularization, which adds a penalty equal to the square of the magnitude of coefficients. On the other hand, 'Lasso' regression uses L1 regularization, which adds a penalty equal to the absolute value of the magnitude of coefficients.",
140
- "answer": "B",
141
- "level": "Senior",
142
- "domain": "AI"
143
- },
144
- {
145
- "id": "11",
146
- "question": "What is 'Data Augmentation' in the context of Machine Learning?",
147
- "choices": [
148
- "A. It's a technique to artificially create new training data from existing training data.",
149
- "B. It's a technique to increase the size of the dataset by collecting more data.",
150
- "C. It's a technique to clean the training data.",
151
- "D. It's a technique to reduce the size of the dataset."
152
- ],
153
- "explanation": "'Data Augmentation' is a strategy that enables practitioners to significantly increase the diversity of data available for training models, without actually collecting new data. Data augmentation techniques such as cropping, padding, and horizontal flipping are commonly used to train large neural networks.",
154
- "answer": "A",
155
- "level": "Senior",
156
- "domain": "AI"
157
- },
158
- {
159
- "id": "12",
160
- "question": "What is the purpose of 'Max Pooling' in a Convolutional Neural Network (CNN)?",
161
- "choices": [
162
- "A. To reduce the spatial dimensions of the output volume.",
163
- "B. To increase the spatial dimensions of the output volume.",
164
- "C. To normalize the output of the previous activation layer.",
165
- "D. To introduce non-linearity in the neural network."
166
- ],
167
- "explanation": "'Max Pooling' is a pooling operation that is typically added to CNNs following individual convolutional layers. When added to a model, max pooling reduces the dimensionality of images by reducing the number of pixels in the output from the previous convolutional layer.",
168
- "answer": "A",
169
- "level": "Senior",
170
- "domain": "AI"
171
- },
172
- {
173
- "id": "13",
174
- "question": "What is the difference between 'Batch Gradient Descent' and 'Mini-Batch Gradient Descent'?",
175
- "choices": [
176
- "A. 'Batch Gradient Descent' uses the entire training set to compute the gradient of the cost function, while 'Mini-Batch Gradient Descent' uses a subset of the training set.",
177
- "B. 'Mini-Batch Gradient Descent' uses the entire training set to compute the gradient of the cost function, while 'Batch Gradient Descent' uses a subset of the training set.",
178
- "C. Both 'Batch Gradient Descent' and 'Mini-Batch Gradient Descent' use the entire training set to compute the gradient of the cost function.",
179
- "D. Both 'Batch Gradient Descent' and 'Mini-Batch Gradient Descent' use a subset of the training set to compute the gradient of the cost function."
180
- ],
181
- "explanation": "'Batch Gradient Descent' uses the entire training set to compute the gradient of the cost function, while 'Mini-Batch Gradient Descent' uses a subset of the training set. With 'Mini-Batch Gradient Descent', you can replace the actual gradient (calculated from the entire data set) with an estimate of the gradient (calculated from a randomly selected subset of the data). Especially in big data applications, this can help to speed up gradient-based optimization algorithms significantly.",
182
- "answer": "A",
183
- "level": "Senior",
184
- "domain": "AI"
185
- },
186
- {
187
- "id": "14",
188
- "question": "What is 'Principal Component Analysis' (PCA) used for?",
189
- "choices": [
190
- "A. PCA is used to compress the data by reducing the number of dimensions.",
191
- "B. PCA is used to decompress the data by increasing the number of dimensions.",
192
- "C. PCA is used to classify the data into different categories.",
193
- "D. PCA is used to cluster the data into different groups."
194
- ],
195
- "explanation": "'Principal Component Analysis' (PCA) is a dimensionality reduction technique that is commonly used in machine learning and data visualization. It can be thought of as a projection method where data with 'm' columns (features) is projected into a subspace with 'm' or fewer columns, whilst retaining the essence of the original data.",
196
- "answer": "A",
197
- "level": "Senior",
198
- "domain": "AI"
199
- },
200
- {
201
- "id": "15",
202
- "question": "What is the purpose of 'Word Embeddings' in Natural Language Processing (NLP)?",
203
- "choices": [
204
- "A. To map words or phrases from the vocabulary to vectors of real numbers.",
205
- "B. To map words or phrases from the vocabulary to a dictionary of words.",
206
- "C. To convert the words in the vocabulary to lower case.",
207
- "D. To remove stop words from the vocabulary."
208
- ],
209
- "explanation": "'Word Embeddings' are a type of word representation that allows words with similar meaning to have a similar representation. They are a distributed representation for text that is perhaps one of the key breakthroughs for the impressive performance of deep learning methods on challenging natural language processing problems.",
210
- "answer": "A",
211
- "level": "Senior",
212
- "domain": "AI"
213
- },
214
- {
215
- "id": "16",
216
- "question": "What is the difference between 'Long Short Term Memory' (LSTM) and 'Gated Recurrent Unit' (GRU)?",
217
- "choices": [
218
- "A. LSTM has three gates (input, output, forget) while GRU has two gates (reset, update).",
219
- "B. GRU has three gates (input, output, forget) while LSTM has two gates (reset, update).",
220
- "C. Both LSTM and GRU have three gates (input, output, forget).",
221
- "D. Both LSTM and GRU have two gates (reset, update)."
222
- ],
223
- "explanation": "Both LSTM (Long Short Term Memory) and GRU (Gated Recurrent Unit) are types of recurrent neural network (RNN) architecture used in deep learning. The key difference between them is that LSTM has three gates (input, output, forget), while GRU has two gates (reset, update). This makes GRUs a simpler and more efficient model for certain tasks.",
224
- "answer": "A",
225
- "level": "Senior",
226
- "domain": "AI"
227
- },
228
- {
229
- "id": "17",
230
- "question": "What is 'Autoencoder' in the context of Machine Learning?",
231
- "choices": [
232
- "A. It's a type of artificial neural network used for learning efficient codings of input data.",
233
- "B. It's a type of artificial neural network used for generating new data that is similar to the input data.",
234
- "C. It's a type of artificial neural network used for classifying input data into different categories.",
235
- "D. It's a type of artificial neural network used for clustering input data into different groups."
236
- ],
237
- "explanation": "An 'Autoencoder' is a type of artificial neural network used for learning efficient codings of input data. It's typically used for the purpose of dimensionality reduction and feature learning.",
238
- "answer": "A",
239
- "level": "Senior",
240
- "domain": "AI"
241
- },
242
- {
243
- "id": "18",
244
- "question": "What is the purpose of 'Attention Mechanism' in the context of Machine Learning?",
245
- "choices": [
246
- "A. It's used to focus on certain parts of the input data that are more relevant to the task at hand.",
247
- "B. It's used to pay equal attention to all parts of the input data.",
248
- "C. It's used to ignore certain parts of the input data that are not relevant to the task at hand.",
249
- "D. Both A and C."
250
- ],
251
- "explanation": "The 'Attention Mechanism' is a technique used in machine learning models, especially in deep learning models, to focus on certain parts of the input data that are more relevant to the task at hand, and to ignore other parts. It's particularly useful in tasks such as machine translation, where it's important to focus on the right words in the input sequence when generating the output sequence.",
252
- "answer": "D",
253
- "level": "Senior",
254
- "domain": "AI"
255
- },
256
- {
257
- "id": "19",
258
- "question": "What is 'Reinforcement Learning' in the context of Machine Learning?",
259
- "choices": [
260
- "A. It's a type of machine learning where an agent learns to make decisions by taking actions in an environment to maximize some notion of cumulative reward.",
261
- "B. It's a type of machine learning where an agent learns to make decisions based on a fixed set of rules.",
262
- "C. It's a type of machine learning where an agent learns to make decisions based on a predefined set of actions.",
263
- "D. It's a type of machine learning where an agent learns to make decisions based on the actions taken by other agents."
264
- ],
265
- "explanation": "'Reinforcement Learning' is a type of machine learning where an agent learns to make decisions by taking actions in an environment to maximize some notion of cumulative reward. The agent learns from the consequences of its actions, rather than from being explicitly taught and it selects its actions on basis of its past experiences (exploitation) and also by new choices (exploration).",
266
- "answer": "A",
267
- "level": "Senior",
268
- "domain": "AI"
269
- },
270
- {
271
- "id": "20",
272
- "question": "What is 'Generative Adversarial Network' (GAN) in the context of Machine Learning?",
273
- "choices": [
274
- "A. It's a class of machine learning systems invented by Ian Goodfellow and his colleagues in 2014.",
275
- "B. It's a class of machine learning systems where two neural networks contest with each other in a game.",
276
- "C. It's a class of machine learning systems where one neural network, called the generator, generates new data instances, while the other, the discriminator, evaluates them for authenticity.",
277
- "D. All of the above."
278
- ],
279
- "explanation": "'Generative Adversarial Network' (GAN) is a class of machine learning systems invented by Ian Goodfellow and his colleagues in 2014. Two neural networks contest with each other in a game. Given a training set, this technique learns to generate new data with the same statistics as the training set. For example, a GAN trained on photographs can generate new photographs that look at least superficially authentic to human observers, having many realistic characteristics.",
280
- "answer": "D",
281
- "level": "Senior",
282
- "domain": "AI"
283
- },
284
- {
285
- "id": "21",
286
- "question": "Write a Python function to implement a basic 'K-Nearest Neighbors' (KNN) model.",
287
- "explanation": "The function should take a dataset and a value for 'K' as arguments and return a trained KNN model.",
288
- "level": "Senior",
289
- "domain": "AI"
290
- },
291
- {
292
- "id": "22",
293
- "question": "Write a Python function to implement a basic 'Naive Bayes' model.",
294
- "explanation": "The function should take a dataset as an argument and return a trained Naive Bayes model.",
295
- "level": "Senior",
296
- "domain": "AI"
297
- },
298
- {
299
- "id": "23",
300
- "question": "Write a Python function to implement a basic 'Random Forest' model.",
301
- "explanation": "The function should take a dataset as an argument and return a trained Random Forest model.",
302
- "level": "Senior",
303
- "domain": "AI"
304
- },
305
- {
306
- "id": "24",
307
- "question": "Write a Python function to implement a basic 'Gradient Boosting' model.",
308
- "explanation": "The function should take a dataset as an argument and return a trained Gradient Boosting model.",
309
- "level": "Senior",
310
- "domain": "AI"
311
- },
312
- {
313
- "id": "25",
314
- "question": "Write a Python function to implement a basic 'Deep Neural Network' (DNN) model.",
315
- "explanation": "The function should take a dataset as an argument and return a trained DNN model.",
316
- "level": "Senior",
317
- "domain": "AI"
318
- },
319
- {
320
- "id": "26",
321
- "question": "Write a Python function to implement a basic 'Convolutional Neural Network' (CNN) model.",
322
- "explanation": "The function should take a dataset as an argument and return a trained CNN model.",
323
- "level": "Senior",
324
- "domain": "AI"
325
- },
326
- {
327
- "id": "27",
328
- "question": "Write a Python function to implement a basic 'Decision Tree' model.",
329
- "explanation": "The function should take a dataset as an argument and return a trained decision tree model.",
330
- "level": "Senior",
331
- "domain": "AI"
332
- },
333
- {
334
- "id": "28",
335
- "question": "Write a Python function to implement a basic 'Support Vector Machine' (SVM) model.",
336
- "explanation": "The function should take a dataset as an argument and return a trained SVM model.",
337
- "level": "Senior",
338
- "domain": "AI"
339
- },
340
- {
341
- "id": "29",
342
- "question": "Write a Python function to implement a basic 'Linear Regression' model.",
343
- "explanation": "The function should take a dataset as an argument and return a trained linear regression model.",
344
- "level": "Senior",
345
- "domain": "AI"
346
- },
347
- {
348
- "id": "30",
349
- "question": "Write a Python function to implement a basic 'Logistic Regression' model.",
350
- "explanation": "The function should take a dataset as an argument and return a trained logistic regression model.",
351
- "level": "Senior",
352
- "domain": "AI"
353
- }
354
- ]
355
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,31 +1,90 @@
 
 
1
  annotated-types==0.6.0
2
  anyio==4.3.0
 
 
 
3
  certifi==2024.2.2
 
 
4
  click==8.1.7
5
  colorama==0.4.6
 
 
6
  dnspython==2.6.1
7
  email_validator==2.1.1
8
  fastapi==0.110.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  h11==0.14.0
10
  httpcore==1.0.4
 
11
  httptools==0.6.1
12
  httpx==0.27.0
13
  idna==3.6
14
  itsdangerous==2.1.2
15
  Jinja2==3.1.3
 
 
 
 
 
 
 
 
 
16
  MarkupSafe==2.1.5
 
 
 
 
 
17
  orjson==3.9.15
 
 
 
 
 
 
18
  pydantic==2.6.3
19
  pydantic-extra-types==2.6.0
20
  pydantic-settings==2.2.1
21
  pydantic_core==2.16.3
 
 
 
22
  python-dotenv==1.0.1
23
  python-multipart==0.0.9
24
  PyYAML==6.0.1
 
 
25
  sniffio==1.3.1
 
26
  starlette==0.36.3
 
 
 
27
  typing_extensions==4.10.0
28
  ujson==5.9.0
 
 
29
  uvicorn==0.27.1
30
  watchfiles==0.21.0
31
  websockets==12.0
 
 
1
+ aiohttp==3.9.3
2
+ aiosignal==1.3.1
3
  annotated-types==0.6.0
4
  anyio==4.3.0
5
+ attrs==23.2.0
6
+ CacheControl==0.14.0
7
+ cachetools==5.3.3
8
  certifi==2024.2.2
9
+ cffi==1.16.0
10
+ charset-normalizer==3.3.2
11
  click==8.1.7
12
  colorama==0.4.6
13
+ cryptography==42.0.5
14
+ dataclasses-json==0.6.4
15
  dnspython==2.6.1
16
  email_validator==2.1.1
17
  fastapi==0.110.0
18
+ firebase-admin==6.4.0
19
+ frozenlist==1.4.1
20
+ google-ai-generativelanguage==0.4.0
21
+ google-api-core==2.17.1
22
+ google-api-python-client==2.121.0
23
+ google-auth==2.28.1
24
+ google-auth-httplib2==0.2.0
25
+ google-cloud-core==2.4.1
26
+ google-cloud-firestore==2.15.0
27
+ google-cloud-storage==2.15.0
28
+ google-crc32c==1.5.0
29
+ google-generativeai==0.3.2
30
+ google-resumable-media==2.7.0
31
+ googleapis-common-protos==1.62.0
32
+ greenlet==3.0.3
33
+ grpcio==1.62.0
34
+ grpcio-status==1.62.0
35
  h11==0.14.0
36
  httpcore==1.0.4
37
+ httplib2==0.22.0
38
  httptools==0.6.1
39
  httpx==0.27.0
40
  idna==3.6
41
  itsdangerous==2.1.2
42
  Jinja2==3.1.3
43
+ jsonpatch==1.33
44
+ jsonpointer==2.4
45
+ langchain==0.1.11
46
+ langchain-community==0.0.27
47
+ langchain-core==0.1.30
48
+ langchain-google-genai==0.0.9
49
+ langchain-text-splitters==0.0.1
50
+ langsmith==0.1.22
51
+ lxml==5.1.0
52
  MarkupSafe==2.1.5
53
+ marshmallow==3.21.1
54
+ msgpack==1.0.8
55
+ multidict==6.0.5
56
+ mypy-extensions==1.0.0
57
+ numpy==1.26.4
58
  orjson==3.9.15
59
+ packaging==23.2
60
+ proto-plus==1.23.0
61
+ protobuf==4.25.3
62
+ pyasn1==0.5.1
63
+ pyasn1-modules==0.3.0
64
+ pycparser==2.21
65
  pydantic==2.6.3
66
  pydantic-extra-types==2.6.0
67
  pydantic-settings==2.2.1
68
  pydantic_core==2.16.3
69
+ PyJWT==2.8.0
70
+ pyparsing==3.1.2
71
+ python-docx==1.1.0
72
  python-dotenv==1.0.1
73
  python-multipart==0.0.9
74
  PyYAML==6.0.1
75
+ requests==2.31.0
76
+ rsa==4.9
77
  sniffio==1.3.1
78
+ SQLAlchemy==2.0.28
79
  starlette==0.36.3
80
+ tenacity==8.2.3
81
+ tqdm==4.66.2
82
+ typing-inspect==0.9.0
83
  typing_extensions==4.10.0
84
  ujson==5.9.0
85
+ uritemplate==4.1.1
86
+ urllib3==2.2.1
87
  uvicorn==0.27.1
88
  watchfiles==0.21.0
89
  websockets==12.0
90
+ yarl==1.9.4