update: add file upload query in search candidates page
Browse files- pages/02_Professional Screen.py +1 -1
- pages/05_Search_Candidates.py +17 -29
- requirements.txt +1 -0
- utils/utils.py +8 -5
pages/02_Professional Screen.py
CHANGED
@@ -11,7 +11,7 @@ from langchain.chat_models import ChatOpenAI
|
|
11 |
from langchain.chains import ConversationChain, RetrievalQA
|
12 |
from langchain.prompts.prompt import PromptTemplate
|
13 |
from langchain.text_splitter import NLTKTextSplitter
|
14 |
-
from langchain.embeddings import
|
15 |
from langchain.vectorstores import FAISS
|
16 |
import nltk
|
17 |
from prompts.prompts import templates
|
|
|
11 |
from langchain.chains import ConversationChain, RetrievalQA
|
12 |
from langchain.prompts.prompt import PromptTemplate
|
13 |
from langchain.text_splitter import NLTKTextSplitter
|
14 |
+
from langchain.embeddings import VoyageEmbeddings
|
15 |
from langchain.vectorstores import FAISS
|
16 |
import nltk
|
17 |
from prompts.prompts import templates
|
pages/05_Search_Candidates.py
CHANGED
@@ -1,39 +1,12 @@
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
-
|
4 |
-
from langchain.chat_models import ChatOpenAI
|
5 |
-
from langchain.llms import HuggingFaceEndpoint
|
6 |
-
from langchain.document_loaders import PyPDFDirectoryLoader
|
7 |
-
from langchain.chains import RetrievalQA
|
8 |
-
from langchain.chat_models import ChatOpenAI
|
9 |
-
from langchain.vectorstores import DeepLake
|
10 |
from utils.llm import model_pipeline, load_memory, typewriter
|
|
|
11 |
from dotenv import load_dotenv
|
12 |
load_dotenv()
|
13 |
|
14 |
st.title("Search the right candidates!")
|
15 |
-
# st.write( "OPENAI_API_KEY" not in os.environ.keys())
|
16 |
-
# if "OPENAI_API_KEY" not in os.environ.keys():
|
17 |
-
# os.environ["OPENAI_API_KEY"] = st.text_input(
|
18 |
-
# "PLEASE ENTER YOUR OPEN API KEY. Head over to this [link](https://platform.openai.com/api-keys)",
|
19 |
-
# placeholder="Enter your Key here...",
|
20 |
-
# type="password")
|
21 |
-
# st.write("You can also set the OPENAI_API_KEY environment variable.")
|
22 |
-
# st.rerun()
|
23 |
-
#
|
24 |
-
# if "COHERE_API_KEY" not in os.environ.keys():
|
25 |
-
# os.environ["COHERE_API_KEY"] = st.text_input(
|
26 |
-
# "PLEASE ENTER YOUR COHERE API KEY. Head over to this [link](https://dashboard.cohere.com/welcome/login?redirect_uri=%2F)",
|
27 |
-
# placeholder="Enter your Key here...",
|
28 |
-
# type="password")
|
29 |
-
# st.rerun()
|
30 |
-
#
|
31 |
-
# if "VOYAGE_API_KEY" not in os.environ.keys():
|
32 |
-
# os.environ["VOYAGE_API_KEY"] = st.text_input(
|
33 |
-
# "PLEASE ENTER YOUR VOYAGE API KEY. Head over to this [link](https://dash.voyageai.com/)",
|
34 |
-
# placeholder="Enter your Key here...",
|
35 |
-
# type="password")
|
36 |
-
# st.rerun()
|
37 |
|
38 |
if "messages" not in st.session_state:
|
39 |
st.session_state.messages = []
|
@@ -41,6 +14,21 @@ if "messages" not in st.session_state:
|
|
41 |
if "memory" not in st.session_state:
|
42 |
st.session_state["memory"] = load_memory()
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
for message in st.session_state.messages:
|
45 |
with st.chat_message(message["role"]):
|
46 |
st.markdown(message["content"])
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
+
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from utils.llm import model_pipeline, load_memory, typewriter
|
5 |
+
from utils.utils import load_documents
|
6 |
from dotenv import load_dotenv
|
7 |
load_dotenv()
|
8 |
|
9 |
st.title("Search the right candidates!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
if "messages" not in st.session_state:
|
12 |
st.session_state.messages = []
|
|
|
14 |
if "memory" not in st.session_state:
|
15 |
st.session_state["memory"] = load_memory()
|
16 |
|
17 |
+
uploaded_file = st.file_uploader("Choose a PDF...", type="pdf")
|
18 |
+
if uploaded_file is not None:
|
19 |
+
# Create a temporary directory
|
20 |
+
temp_dir = tempfile.mkdtemp()
|
21 |
+
file_name = st.text_input("Enter File name: ", "uploaded_file.pdf")
|
22 |
+
st.session_state["file_name"] = file_name
|
23 |
+
# Save the uploaded file to the temporary directory
|
24 |
+
with open(os.path.join(temp_dir, 'uploaded_file.pdf'), 'wb') as f:
|
25 |
+
f.write(uploaded_file.getvalue())
|
26 |
+
|
27 |
+
# Pass the file path to the load_documents function
|
28 |
+
load_documents(file_path=os.path.join(temp_dir, 'uploaded_file.pdf'))
|
29 |
+
st.session_state.messages.append({"role": "assistant", "content": "I have loaded the resume."})
|
30 |
+
del uploaded_file
|
31 |
+
|
32 |
for message in st.session_state.messages:
|
33 |
with st.chat_message(message["role"]):
|
34 |
st.markdown(message["content"])
|
requirements.txt
CHANGED
@@ -13,6 +13,7 @@ cohere
|
|
13 |
wave
|
14 |
nltk
|
15 |
tiktoken
|
|
|
16 |
audio_recorder_streamlit
|
17 |
streamlit-option-menu
|
18 |
streamlit-lottie
|
|
|
13 |
wave
|
14 |
nltk
|
15 |
tiktoken
|
16 |
+
pymupdf
|
17 |
audio_recorder_streamlit
|
18 |
streamlit-option-menu
|
19 |
streamlit-lottie
|
utils/utils.py
CHANGED
@@ -12,11 +12,14 @@ def init_vectorstore(dataset_path="hub://p1utoze/default", embeddings="voyage/vo
|
|
12 |
db = DeepLake(dataset_path=dataset_path, embedding=embeddings)
|
13 |
return db
|
14 |
|
15 |
-
def load_documents(base_path="data/INFORMATION-TECHNOLOGY/"):
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
20 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
|
21 |
docs = loader.load_and_split(text_splitter)
|
22 |
db = init_vectorstore("hub://p1utoze/resumes", embeddings)
|
|
|
12 |
db = DeepLake(dataset_path=dataset_path, embedding=embeddings)
|
13 |
return db
|
14 |
|
15 |
+
def load_documents(file_path=None, base_path="data/INFORMATION-TECHNOLOGY/"):
|
16 |
+
if file_path:
|
17 |
+
loader = PyMuPDFLoader(file_path)
|
18 |
+
else:
|
19 |
+
for file in os.listdir(base_path):
|
20 |
+
path = base_path + file
|
21 |
+
print(path)
|
22 |
+
loader = PyMuPDFLoader(path)
|
23 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
|
24 |
docs = loader.load_and_split(text_splitter)
|
25 |
db = init_vectorstore("hub://p1utoze/resumes", embeddings)
|