plutoze commited on
Commit
3634e75
·
1 Parent(s): d240f92

update: add file upload query in search candidates page

Browse files
pages/02_Professional Screen.py CHANGED
@@ -11,7 +11,7 @@ from langchain.chat_models import ChatOpenAI
11
  from langchain.chains import ConversationChain, RetrievalQA
12
  from langchain.prompts.prompt import PromptTemplate
13
  from langchain.text_splitter import NLTKTextSplitter
14
- from langchain.embeddings import OpenAIEmbeddings, VoyageEmbeddings
15
  from langchain.vectorstores import FAISS
16
  import nltk
17
  from prompts.prompts import templates
 
11
  from langchain.chains import ConversationChain, RetrievalQA
12
  from langchain.prompts.prompt import PromptTemplate
13
  from langchain.text_splitter import NLTKTextSplitter
14
+ from langchain.embeddings import VoyageEmbeddings
15
  from langchain.vectorstores import FAISS
16
  import nltk
17
  from prompts.prompts import templates
pages/05_Search_Candidates.py CHANGED
@@ -1,39 +1,12 @@
1
  import streamlit as st
2
  import os
3
- from pathlib import Path
4
- from langchain.chat_models import ChatOpenAI
5
- from langchain.llms import HuggingFaceEndpoint
6
- from langchain.document_loaders import PyPDFDirectoryLoader
7
- from langchain.chains import RetrievalQA
8
- from langchain.chat_models import ChatOpenAI
9
- from langchain.vectorstores import DeepLake
10
  from utils.llm import model_pipeline, load_memory, typewriter
 
11
  from dotenv import load_dotenv
12
  load_dotenv()
13
 
14
  st.title("Search the right candidates!")
15
- # st.write( "OPENAI_API_KEY" not in os.environ.keys())
16
- # if "OPENAI_API_KEY" not in os.environ.keys():
17
- # os.environ["OPENAI_API_KEY"] = st.text_input(
18
- # "PLEASE ENTER YOUR OPEN API KEY. Head over to this [link](https://platform.openai.com/api-keys)",
19
- # placeholder="Enter your Key here...",
20
- # type="password")
21
- # st.write("You can also set the OPENAI_API_KEY environment variable.")
22
- # st.rerun()
23
- #
24
- # if "COHERE_API_KEY" not in os.environ.keys():
25
- # os.environ["COHERE_API_KEY"] = st.text_input(
26
- # "PLEASE ENTER YOUR COHERE API KEY. Head over to this [link](https://dashboard.cohere.com/welcome/login?redirect_uri=%2F)",
27
- # placeholder="Enter your Key here...",
28
- # type="password")
29
- # st.rerun()
30
- #
31
- # if "VOYAGE_API_KEY" not in os.environ.keys():
32
- # os.environ["VOYAGE_API_KEY"] = st.text_input(
33
- # "PLEASE ENTER YOUR VOYAGE API KEY. Head over to this [link](https://dash.voyageai.com/)",
34
- # placeholder="Enter your Key here...",
35
- # type="password")
36
- # st.rerun()
37
 
38
  if "messages" not in st.session_state:
39
  st.session_state.messages = []
@@ -41,6 +14,21 @@ if "messages" not in st.session_state:
41
  if "memory" not in st.session_state:
42
  st.session_state["memory"] = load_memory()
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  for message in st.session_state.messages:
45
  with st.chat_message(message["role"]):
46
  st.markdown(message["content"])
 
1
  import streamlit as st
2
  import os
3
+ import tempfile
 
 
 
 
 
 
4
  from utils.llm import model_pipeline, load_memory, typewriter
5
+ from utils.utils import load_documents
6
  from dotenv import load_dotenv
7
  load_dotenv()
8
 
9
  st.title("Search the right candidates!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  if "messages" not in st.session_state:
12
  st.session_state.messages = []
 
14
  if "memory" not in st.session_state:
15
  st.session_state["memory"] = load_memory()
16
 
17
+ uploaded_file = st.file_uploader("Choose a PDF...", type="pdf")
18
+ if uploaded_file is not None:
19
+ # Create a temporary directory
20
+ temp_dir = tempfile.mkdtemp()
21
+ file_name = st.text_input("Enter File name: ", "uploaded_file.pdf")
22
+ st.session_state["file_name"] = file_name
23
+ # Save the uploaded file to the temporary directory
24
+ with open(os.path.join(temp_dir, 'uploaded_file.pdf'), 'wb') as f:
25
+ f.write(uploaded_file.getvalue())
26
+
27
+ # Pass the file path to the load_documents function
28
+ load_documents(file_path=os.path.join(temp_dir, 'uploaded_file.pdf'))
29
+ st.session_state.messages.append({"role": "assistant", "content": "I have loaded the resume."})
30
+ del uploaded_file
31
+
32
  for message in st.session_state.messages:
33
  with st.chat_message(message["role"]):
34
  st.markdown(message["content"])
requirements.txt CHANGED
@@ -13,6 +13,7 @@ cohere
13
  wave
14
  nltk
15
  tiktoken
 
16
  audio_recorder_streamlit
17
  streamlit-option-menu
18
  streamlit-lottie
 
13
  wave
14
  nltk
15
  tiktoken
16
+ pymupdf
17
  audio_recorder_streamlit
18
  streamlit-option-menu
19
  streamlit-lottie
utils/utils.py CHANGED
@@ -12,11 +12,14 @@ def init_vectorstore(dataset_path="hub://p1utoze/default", embeddings="voyage/vo
12
  db = DeepLake(dataset_path=dataset_path, embedding=embeddings)
13
  return db
14
 
15
- def load_documents(base_path="data/INFORMATION-TECHNOLOGY/"):
16
- for file in os.listdir(base_path):
17
- path = base_path + file
18
- print(path)
19
- loader = PyMuPDFLoader(path)
 
 
 
20
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
21
  docs = loader.load_and_split(text_splitter)
22
  db = init_vectorstore("hub://p1utoze/resumes", embeddings)
 
12
  db = DeepLake(dataset_path=dataset_path, embedding=embeddings)
13
  return db
14
 
15
+ def load_documents(file_path=None, base_path="data/INFORMATION-TECHNOLOGY/"):
16
+ if file_path:
17
+ loader = PyMuPDFLoader(file_path)
18
+ else:
19
+ for file in os.listdir(base_path):
20
+ path = base_path + file
21
+ print(path)
22
+ loader = PyMuPDFLoader(path)
23
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
24
  docs = loader.load_and_split(text_splitter)
25
  db = init_vectorstore("hub://p1utoze/resumes", embeddings)