QuangDinh2102 commited on
Commit
a37b78a
1 Parent(s): 850cd75

Phase3/QuangDT: Add question_rag module

Browse files
app/modules/question_rag/models/question_rag_logic.py CHANGED
@@ -1,16 +1,12 @@
1
- '''
2
- Input : str of job description
3
- Output : str of quiz
4
- '''
5
-
6
  from langchain_google_genai import ChatGoogleGenerativeAI
7
- from langchain_community.document_loaders import Docx2txtLoader
8
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
9
- from langchain_community.vectorstores import FAISS
10
- from langchain_text_splitters import RecursiveCharacterTextSplitter
11
  from langchain_core.prompts import ChatPromptTemplate
12
  from langchain.chains.combine_documents import create_stuff_documents_chain
13
  from langchain.chains import create_retrieval_chain
 
 
 
 
14
 
15
  import os
16
  from dotenv import load_dotenv
@@ -20,37 +16,46 @@ load_dotenv()
20
 
21
  # Define the google api key
22
  os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
23
- GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
24
-
25
- # JOB_TEXT = "Job Title is Senior Python Software Engineer, Level is Senior, and Brief summary of required skills is 5+ years of professional Python development experience, Expertise in Python and its frameworks."
26
 
27
- def question_rag(jobtext):
28
- llm = ChatGoogleGenerativeAI(model="gemini-pro")
29
-
30
- loader = Docx2txtLoader("data/w3school_data.docx")
31
 
32
- docs = loader.load()
 
33
 
34
  embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
35
 
36
- text_splitter = RecursiveCharacterTextSplitter()
37
- documents = text_splitter.split_documents(docs)
38
- vector = FAISS.from_documents(documents, embeddings)
39
-
40
- prompt = ChatPromptTemplate.from_template("""Answer the question based only on the following context:
 
 
 
 
 
 
 
 
 
 
41
  <context>
42
  {context}
43
  </context>
44
 
45
  Generate a 10 quiz suitable for the given job description "{input}". Do not include "All of the above" answers.
46
  Output format is JSON:
47
- ("count": 10, "data": ( "id": "", "question": "", "choices": [ "A. ", "B. ", "C.", "D. " ], "explanation": "", "answer": "", "level": "", "domain": "" )).
48
  About level help me three levels: "Fresher, Junior, Senior".
49
- """)
50
 
51
- document_chain = create_stuff_documents_chain(llm, prompt)
52
 
53
- retriever = vector.as_retriever()
54
  retrieval_chain = create_retrieval_chain(retriever, document_chain)
55
  response = retrieval_chain.invoke({"input": jobtext})
56
 
 
 
 
 
 
 
1
  from langchain_google_genai import ChatGoogleGenerativeAI
 
2
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
 
 
3
  from langchain_core.prompts import ChatPromptTemplate
4
  from langchain.chains.combine_documents import create_stuff_documents_chain
5
  from langchain.chains import create_retrieval_chain
6
+ from langchain_core.output_parsers import JsonOutputParser
7
+
8
+ from langchain_community.vectorstores import Qdrant
9
+ import qdrant_client
10
 
11
  import os
12
  from dotenv import load_dotenv
 
16
 
17
  # Define the google api key
18
  os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
19
+ os.environ['QDRANT_API_KEY'] = os.getenv('QDRANT_API_KEY')
20
+ os.environ['QDRANT_URL'] = os.getenv('QDRANT_URL')
 
21
 
22
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
23
+ QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
24
+ QDRANT_URL = os.environ.get("QDRANT_URL")
 
25
 
26
+ def question_rag(jobtext: str):
27
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY, request_timeout=120)
28
 
29
  embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
30
 
31
+ client = qdrant_client.QdrantClient(
32
+ url=QDRANT_URL,
33
+ api_key=QDRANT_API_KEY,
34
+ )
35
+
36
+ doc_store = Qdrant(
37
+ client=client,
38
+ collection_name="rag_documents_test",
39
+ embeddings=embeddings,
40
+ )
41
+
42
+ json_parser = JsonOutputParser()
43
+
44
+ prompt = ChatPromptTemplate.from_template("""
45
+ Answer the question based only on the following context:
46
  <context>
47
  {context}
48
  </context>
49
 
50
  Generate a 10 quiz suitable for the given job description "{input}". Do not include "All of the above" answers.
51
  Output format is JSON:
52
+ ("__count__": 10, "data": ( "id": "", "question": "", "choices": [ "A. ", "B. ", "C.", "D. " ], "explanation": "", "answer": "", "level": "", "domain": "" )).
53
  About level help me three levels: "Fresher, Junior, Senior".
54
+ """)
55
 
56
+ document_chain = create_stuff_documents_chain(llm, prompt, output_parser=json_parser)
57
 
58
+ retriever = doc_store.as_retriever()
59
  retrieval_chain = create_retrieval_chain(retriever, document_chain)
60
  response = retrieval_chain.invoke({"input": jobtext})
61
 
data/.gitkeep DELETED
File without changes
data/w3school_data.docx DELETED
Binary file (237 kB)
 
requirements.txt CHANGED
@@ -13,7 +13,6 @@ colorama==0.4.6
13
  cryptography==42.0.5
14
  dataclasses-json==0.6.4
15
  dnspython==2.6.1
16
- docx2txt==0.8
17
  email_validator==2.1.1
18
  fastapi==0.110.0
19
  firebase-admin==6.4.0
 
13
  cryptography==42.0.5
14
  dataclasses-json==0.6.4
15
  dnspython==2.6.1
 
16
  email_validator==2.1.1
17
  fastapi==0.110.0
18
  firebase-admin==6.4.0