File size: 6,397 Bytes
b06ff0c 5ad9f7c b06ff0c 5ad9f7c b06ff0c 86068b8 b06ff0c 5ad9f7c b06ff0c 5ad9f7c b06ff0c 5ad9f7c b06ff0c 5ad9f7c b06ff0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import streamlit as st
from dotenv import load_dotenv
from Components.FindKeyword import filter_keywords
from Components.PreprocessText import get_pdf_text
from Components.model_Responce import model_prediction
from Components.GooglePalmChat import get_qa_chain
from Components.Vector_db import encode_question, save_vector_store
from streamlit_extras.add_vertical_space import add_vertical_space
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from htmlTemplates import css, bot_template, user_template
from InstructorEmbedding import INSTRUCTOR
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunks
def button_function(all_text):
# Add your desired functionality here
# predictions = []
for item in all_text:
text = item['text']
# filename = item['filename']
pred = model_prediction(text)
# predictions.append({"filename": filename, "prediction": pred})
item['prediction'] = pred
return all_text
# Main body
def main():
# vector_store = None
load_dotenv()
st.header("Resume Filter using Keywords 💬")
# Sidebar contents
with st.sidebar:
st.title('🤗💬 LLM Chat App')
# upload a PDF file
pdfs = st.file_uploader("Upload your Resumes", type='pdf',accept_multiple_files=True)
# Get user preference for matching keywords
# match_all_keywords = st.checkbox("Match All Keywords")
# Choose functionality: Prediction or Filtering
functionality = st.radio("Choose functionality:", ("Make Predictions", "Filter Keywords","Predict the Suitable canditate","Ask Questions"))
# if functionality == "Ask Questions":
add_vertical_space(5)
st.write('Made with ❤️ by Fazni Farook')
if pdfs is not None:
all_text = get_pdf_text(pdfs)
# if 'conversation' not in st.session_state:
# st.session_state.conversation = None
# if 'chat_history' not in st.session_state:
# st.session_state.chat_history = None
if functionality == "Make Predictions":
if st.button('Make Prediction'):
with st.spinner("Progressing"):
all_text = button_function(all_text)
for item in all_text:
filename = item["filename"]
text = item["text"]
pred = item["prediction"]
st.markdown(f"**Filename: {filename}**")
# st.markdown(text, unsafe_allow_html=True)
st.markdown(f"**Prediction: {pred}**")
st.markdown("---")
elif functionality == "Filter Keywords":
# getting the keywords
keyword_input = st.text_input("Keyword")
keywords = [keyword.strip() for keyword in keyword_input.split(",")]
if st.button('Filter Keywords'):
with st.spinner("Progressing"):
filtered_text = filter_keywords(all_text, keywords)
for item in filtered_text:
filename = item["filename"]
text = item["text"]
st.markdown(f"**Filename: {filename}**")
st.markdown(text, unsafe_allow_html=True)
st.markdown("---")
elif functionality == "Predict the Suitable canditate":
# getting the keywords
keyword = st.text_input("Keyword")
if st.button('Filter Resumes'):
with st.spinner("Progressing"):
all_text = button_function(all_text)
# filtered_text = filter_keywords(all_text, keywords)
count = 0
for item in all_text:
filename = item["filename"]
prediction = item["prediction"]
if keyword.lower()==prediction.lower():
count+=1
st.markdown(f"**Filename: {filename}**")
st.markdown(prediction, unsafe_allow_html=True)
st.markdown("---")
if count==0:
st.markdown("No match found")
elif functionality == "Ask Questions":
embeddings = HuggingFaceInstructEmbeddings()
# new_db = FAISS.load_local("faiss_index_V2", embeddings)
if st.button('Create Knowledgebase'):
with st.spinner("Processing"):
# embeddings = HuggingFaceInstructEmbeddings()
# get pdf text
raw_text = get_pdf_text(pdfs, preprocess=False)
# get the text chunk
text_chunks = get_text_chunks(raw_text)
# create vector store
save_vector_store(text_chunks,embeddings)
st.write(css,unsafe_allow_html=True)
# create conversation chain
# st.session_state.conversation = get_conversation_chain(vector_store)
question = st.text_input("Ask Question: ")
if st.button('Ask Question'):
with st.spinner("Processing"):
if question:
# Convert the question to a vector
# question_vector = encode_question(question,embeddings)
# Convert the vector store to a compatible format
# output = new_db.similarity_search_by_vector(question_vector)
# page_content = output[0].page_content
# Asking Questions using Google Palm
chain = get_qa_chain(embeddings)
response = chain(question)
st.header("Answer: ")
st.write(response["result"])
if __name__=='__main__':
main() |