cy24 commited on
Commit
270cc9c
1 Parent(s): 81608a4

Upload 5 files

Browse files
Files changed (5) hide show
  1. .env +2 -0
  2. .env.example +2 -0
  3. app.py +76 -0
  4. requirements.txt +9 -0
  5. utils.py +103 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENAI_API_KEY="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"
2
+ HUGGINGFACEHUB_API_TOKEN="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"
.env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENAI_API_KEY=""
2
+ HUGGINGFACEHUB_API_TOKEN=""
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from utils import *
4
+ import uuid
5
+
6
+
7
+
8
+
9
+ #Creating session variables
10
+ if 'unique_id' not in st.session_state:
11
+ st.session_state['unique_id'] =''
12
+
13
+ def main():
14
+ load_dotenv()
15
+ st.set_page_config(page_title="Resume Screening Assistance")
16
+ st.markdown('<style>p {font-size: 20px;}</style>', unsafe_allow_html=True)
17
+ st.title("AI Resume Screening Assistance 💁 ")
18
+ st.subheader("I can help you in resume screening process")
19
+
20
+ job_description = st.text_area("Please paste the 'JOB DESCRIPTION' here...",key="1")
21
+ document_count = st.text_input("No.of 'RESUMES' to return",key="2")
22
+ # Upload the Resumes (pdf files)
23
+ pdf = st.file_uploader("Upload resumes here, only PDF files allowed", type=["pdf"],accept_multiple_files=True)
24
+
25
+ submit=st.button("Help me with the analysis")
26
+
27
+ if submit:
28
+ with st.spinner('Wait for it...'):
29
+
30
+ #Creating a unique ID, so that we can use to query and get only the user uploaded documents from PINECONE vector store
31
+ st.session_state['unique_id']=uuid.uuid4().hex
32
+
33
+ #Create a documents list out of all the user uploaded pdf files
34
+ final_docs_list=create_docs(pdf,st.session_state['unique_id'])
35
+
36
+ #Displaying the count of resumes that have been uploaded
37
+ st.write("*Resumes uploaded* :"+str(len(final_docs_list)))
38
+
39
+ #Create embeddings instance
40
+ embeddings=create_embeddings_load_data()
41
+
42
+ #Push data to PINECONE
43
+ push_to_pinecone("1a62441c-1d4a-4d80-ab0c-b48d7503fe62","gcp-starter","ai-resume",embeddings,final_docs_list)
44
+
45
+ #Fecth relavant documents from PINECONE
46
+ relavant_docs=similar_docs(job_description,document_count,"1a62441c-1d4a-4d80-ab0c-b48d7503fe62","gcp-starter","ai-resume",embeddings,st.session_state['unique_id'])
47
+
48
+ #t.write(relavant_docs)
49
+
50
+ #Introducing a line separator
51
+ st.write(":heavy_minus_sign:" * 30)
52
+
53
+ #For each item in relavant docs - we are displaying some info of it on the UI
54
+ for item in range(len(relavant_docs)):
55
+
56
+ st.subheader("👉 "+str(item+1))
57
+
58
+ #Displaying Filepath
59
+ st.write("**File** : "+relavant_docs[item][0].metadata['name'])
60
+
61
+ #Introducing Expander feature
62
+ with st.expander('Show me 👀'):
63
+ st.info("**Match Score** : "+str(relavant_docs[item][1]))
64
+ #st.write("***"+relavant_docs[item][0].page_content)
65
+
66
+ #Gets the summary of the current item using 'get_summary' function that we have created which uses LLM & Langchain chain
67
+ summary = get_summary(relavant_docs[item][0])
68
+ st.write("**Summary** : "+summary)
69
+
70
+ st.success("Hope I was able to save your time❤️")
71
+
72
+
73
+ #Invoking main function
74
+ if __name__ == '__main__':
75
+ main()
76
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ streamlit
3
+ openai
4
+ tiktoken
5
+ python-dotenv
6
+ unstructured
7
+ pinecone-client
8
+ pypdf
9
+ sentence_transformers
utils.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.vectorstores import Pinecone
4
+ from langchain.llms import OpenAI
5
+ from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
6
+ from langchain.schema import Document
7
+ import pinecone
8
+ from PyPDF2 import PdfReader
9
+ from PyPDF2 import PdfFileReader
10
+ from langchain.llms.openai import OpenAI
11
+ from langchain.chains.summarize import load_summarize_chain
12
+ from langchain import HuggingFaceHub
13
+ from langchain.llms import ctransformers
14
+
15
+
16
+ #Extract Information from PDF file
17
+ def get_pdf_text(pdf_doc):
18
+ text = ""
19
+ pdf_reader = PdfReader(pdf_doc)
20
+ for page in pdf_reader.pages:
21
+ text += page.extract_text()
22
+ return text
23
+
24
+
25
+
26
+ # iterate over files in
27
+ # that user uploaded PDF files, one by one
28
+ def create_docs(user_pdf_list, unique_id):
29
+ docs=[]
30
+ for filename in user_pdf_list:
31
+
32
+ chunks=get_pdf_text(filename)
33
+
34
+ #Adding items to our list - Adding data & its metadata
35
+ docs.append(Document(
36
+ page_content=chunks,
37
+ metadata={"name": filename.name,"id":filename.id,"type=":filename.type,"size":filename.size,"unique_id":unique_id},
38
+ ))
39
+
40
+ return docs
41
+
42
+
43
+ #Create embeddings instance
44
+ def create_embeddings_load_data():
45
+ #embeddings = OpenAIEmbeddings()
46
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
47
+ return embeddings
48
+
49
+
50
+ #Function to push data to Vector Store - Pinecone here
51
+ def push_to_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,docs):
52
+
53
+ pinecone.init(
54
+ api_key=pinecone_apikey,
55
+ environment=pinecone_environment
56
+ )
57
+ print("done......2")
58
+ Pinecone.from_documents(docs, embeddings, index_name=pinecone_index_name)
59
+
60
+
61
+
62
+ #Function to pull infrmation from Vector Store - Pinecone here
63
+ def pull_from_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings):
64
+
65
+ pinecone.init(
66
+ api_key=pinecone_apikey,
67
+ environment=pinecone_environment
68
+ )
69
+
70
+ index_name = pinecone_index_name
71
+
72
+ index = Pinecone.from_existing_index(index_name, embeddings)
73
+ return index
74
+
75
+
76
+
77
+ #Function to help us get relavant documents from vector store - based on user input
78
+ def similar_docs(query,k,pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,unique_id):
79
+
80
+ pinecone.init(
81
+ api_key=pinecone_apikey,
82
+ environment=pinecone_environment
83
+ )
84
+
85
+ index_name = pinecone_index_name
86
+
87
+ index = pull_from_pinecone(pinecone_apikey,pinecone_environment,index_name,embeddings)
88
+ similar_docs = index.similarity_search_with_score(query, int(k),{"unique_id":unique_id})
89
+ #print(similar_docs)
90
+ return similar_docs
91
+
92
+
93
+ # Helps us get the summary of a document
94
+ def get_summary(current_doc):
95
+ #llm = OpenAI(temperature=0)
96
+ #llm = HuggingFaceHub(repo_id="bigscience/bloom", model_kwargs={"temperature":1e-10})
97
+ llm = ctransformers(model='C:/Users/User/Documents/mistral-7b-v0.1.Q5_K_M.gguf',model_type='mistral',config={'temperature': 1e-10})
98
+ chain = load_summarize_chain(llm, chain_type="map_reduce")
99
+ summary = chain.run([current_doc])
100
+
101
+ return summary
102
+
103
+