Lauredecaudin commited on
Commit
b024450
1 Parent(s): 2ac107e

Update pages/4-Create your own bot (advanced).py

Browse files
pages/4-Create your own bot (advanced).py CHANGED
@@ -86,60 +86,92 @@ def developer_guide():
86
 
87
  # Call the function to display the developer guide page
88
  #developer_guide()
89
-
90
  import streamlit as st
91
- import PyPDF2
92
- from transformers import pipeline
93
  import torch
94
 
95
- # Hugging Face model
96
- MODEL_NAME = "facebook/rag-sequence-nq"
97
-
98
  @st.cache_resource
99
- def load_model():
100
- # Using Hugging Face's pipeline for question-answering with DistilBERT
101
- qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
102
- return qa_pipeline
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- qa_pipeline = load_model()
105
 
106
- def extract_text_from_pdf(pdf_file):
107
- pdf_reader = PyPDF2.PdfReader(pdf_file)
 
108
  text = ""
109
- for page_num in range(len(pdf_reader.pages)):
110
- page = pdf_reader.pages[page_num]
111
  text += page.extract_text()
112
  return text
113
 
114
- def generate_answer(question, context):
115
- # Add context to the question
 
 
 
 
116
  context_instruction = (
117
- f"Your professional experience is outlined in the following resume. "
118
  "Answer the question as if you are the candidate, providing details from the resume where relevant."
119
  )
120
-
121
  # Combine the question with the context instruction
122
  full_question = f"{context_instruction} Question: {question}"
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- # Get the answer using DistilBERT pipeline
125
- result = qa_pipeline(question=full_question, context=resume_text)
126
-
127
- # Streamlit app layout
128
- st.title("Resume-based Q&A Bot")
129
-
130
- st.write("Upload your resume as a PDF, and the bot will answer questions about your professional experiences.")
131
 
132
- uploaded_file = st.file_uploader("Upload Resume (PDF)", type="pdf")
133
 
134
- if uploaded_file:
135
- st.write("Resume uploaded successfully!")
136
- resume_text = extract_text_from_pdf(uploaded_file)
137
- #st.text_area("Extracted Resume Text", value=resume_text, height=250)
138
 
139
- question = st.text_input("Ask a question about your professional experiences:")
 
 
 
 
 
140
 
141
- if st.button("Get Answer") and question:
142
- with st.spinner("Generating answer..."):
143
- answer = generate_answer(question, resume_text)
144
- st.write("**Answer:**", answer)
145
-
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  # Call the function to display the developer guide page
88
  #developer_guide()
 
89
  import streamlit as st
90
+ from transformers import RagRetriever, RagSequenceForGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
91
+ from PyPDF2 import PdfReader
92
  import torch
93
 
94
+ # Load the tokenizer and the custom model (GPT-Neo-125M)
 
 
95
  @st.cache_resource
96
+ def load_gpt_neo_rag():
97
+ tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
98
+
99
+ # Load GPT-Neo as the generator
100
+ custom_generator = AutoModelForSeq2SeqLM.from_pretrained("EleutherAI/gpt-neo-125M")
101
+
102
+ # Initialize RAG retriever
103
+ retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True)
104
+
105
+ # Initialize RAG with GPT-Neo as the generator
106
+ rag_model = RagSequenceForGeneration.from_pretrained(
107
+ "facebook/rag-token-nq", retriever=retriever, generator=custom_generator
108
+ )
109
+
110
+ return tokenizer, rag_model
111
 
112
+ tokenizer, rag_model = load_gpt_neo_rag()
113
 
114
+ # Function to read resume PDF
115
+ def read_pdf(file):
116
+ pdf_reader = PdfReader(file)
117
  text = ""
118
+ for page in pdf_reader.pages:
 
119
  text += page.extract_text()
120
  return text
121
 
122
+ # Function to generate a contextualized answer using RAG with GPT-Neo
123
+ def generate_answer(question, resume_text, name="The candidate"):
124
+ """
125
+ Uses RAG with GPT-Neo to generate answers based on the resume.
126
+ """
127
+ # Add context instruction to guide the model
128
  context_instruction = (
129
+ f"You are {name}, and your professional experience is outlined in the following resume. "
130
  "Answer the question as if you are the candidate, providing details from the resume where relevant."
131
  )
132
+
133
  # Combine the question with the context instruction
134
  full_question = f"{context_instruction} Question: {question}"
135
+
136
+ # Tokenize the input
137
+ inputs = tokenizer(full_question, resume_text, return_tensors="pt", truncation=True, padding="longest")
138
+
139
+ # Generate the response
140
+ outputs = rag_model.generate(**inputs)
141
+
142
+ # Decode the generated response
143
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
144
+
145
+ return answer
146
 
147
+ # Streamlit app UI
148
+ st.title("Resume-based Q&A Bot (RAG with GPT-Neo)")
 
 
 
 
 
149
 
150
+ st.write("Upload your resume and ask questions about your professional experience!")
151
 
152
+ # File uploader for the resume
153
+ uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=["pdf"])
 
 
154
 
155
+ # If a file is uploaded, extract the text
156
+ if uploaded_file is not None:
157
+ resume_text = read_pdf(uploaded_file)
158
+ st.write("Resume successfully uploaded!")
159
+ st.write("Extracted Resume Text:")
160
+ st.text(resume_text) # Display the extracted resume text for reference
161
 
162
+ # Text input for questions
163
+ question = st.text_input("Ask a question about the resume")
164
+
165
+ # Name input for the person in the resume
166
+ candidate_name = st.text_input("Enter the candidate's name (optional)", "The candidate")
167
+
168
+ # Generate and display the answer when the button is clicked
169
+ if st.button("Generate Answer"):
170
+ if question:
171
+ answer = generate_answer(question, resume_text, candidate_name)
172
+ st.write("Answer:")
173
+ st.write(answer)
174
+ else:
175
+ st.write("Please enter a question.")
176
+ else:
177
+ st.write("Please upload a PDF resume to get started.")