|
import streamlit as st |
|
from dotenv import load_dotenv |
|
from Components.FindKeyword import filter_keywords |
|
from Components.PreprocessText import get_pdf_text |
|
from Components.model_Responce import model_prediction |
|
from Components.GooglePalmChat import get_qa_chain |
|
from Components.Vector_db import encode_question, save_vector_store |
|
from streamlit_extras.add_vertical_space import add_vertical_space |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from htmlTemplates import css, bot_template, user_template |
|
from InstructorEmbedding import INSTRUCTOR |
|
import numpy as np |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
def get_text_chunks(text): |
|
text_splitter = CharacterTextSplitter( |
|
separator="\n", |
|
chunk_size=1000, |
|
chunk_overlap=200, |
|
length_function=len |
|
) |
|
chunks = text_splitter.split_text(text) |
|
return chunks |
|
|
|
def button_function(all_text): |
|
|
|
|
|
for item in all_text: |
|
text = item['text'] |
|
|
|
pred = model_prediction(text) |
|
|
|
item['prediction'] = pred |
|
return all_text |
|
|
|
|
|
def main(): |
|
|
|
load_dotenv() |
|
st.header("Resume Filter using Keywords π¬") |
|
|
|
|
|
with st.sidebar: |
|
st.title('π€π¬ LLM Chat App') |
|
|
|
pdfs = st.file_uploader("Upload your Resumes", type='pdf',accept_multiple_files=True) |
|
|
|
|
|
|
|
|
|
|
|
functionality = st.radio("Choose functionality:", ("Make Predictions", "Filter Keywords","Predict the Suitable canditate","Ask Questions")) |
|
|
|
|
|
add_vertical_space(5) |
|
st.write('Made with β€οΈ by Fazni Farook') |
|
|
|
|
|
if pdfs is not None: |
|
all_text = get_pdf_text(pdfs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if functionality == "Make Predictions": |
|
if st.button('Make Prediction'): |
|
with st.spinner("Progressing"): |
|
all_text = button_function(all_text) |
|
|
|
for item in all_text: |
|
filename = item["filename"] |
|
text = item["text"] |
|
pred = item["prediction"] |
|
st.markdown(f"**Filename: {filename}**") |
|
|
|
st.markdown(f"**Prediction: {pred}**") |
|
st.markdown("---") |
|
|
|
elif functionality == "Filter Keywords": |
|
|
|
keyword_input = st.text_input("Keyword") |
|
keywords = [keyword.strip() for keyword in keyword_input.split(",")] |
|
|
|
if st.button('Filter Keywords'): |
|
with st.spinner("Progressing"): |
|
filtered_text = filter_keywords(all_text, keywords) |
|
|
|
for item in filtered_text: |
|
filename = item["filename"] |
|
text = item["text"] |
|
st.markdown(f"**Filename: {filename}**") |
|
st.markdown(text, unsafe_allow_html=True) |
|
st.markdown("---") |
|
|
|
elif functionality == "Predict the Suitable canditate": |
|
|
|
keyword = st.text_input("Keyword") |
|
|
|
if st.button('Filter Resumes'): |
|
with st.spinner("Progressing"): |
|
all_text = button_function(all_text) |
|
|
|
count = 0 |
|
for item in all_text: |
|
filename = item["filename"] |
|
prediction = item["prediction"] |
|
if keyword.lower()==prediction.lower(): |
|
count+=1 |
|
st.markdown(f"**Filename: {filename}**") |
|
st.markdown(prediction, unsafe_allow_html=True) |
|
st.markdown("---") |
|
|
|
if count==0: |
|
st.markdown("No match found") |
|
|
|
elif functionality == "Ask Questions": |
|
|
|
embeddings = HuggingFaceInstructEmbeddings() |
|
|
|
|
|
|
|
if st.button('Create Knowledgebase'): |
|
with st.spinner("Processing"): |
|
|
|
|
|
raw_text = get_pdf_text(pdfs, preprocess=False) |
|
|
|
|
|
text_chunks = get_text_chunks(raw_text) |
|
|
|
|
|
save_vector_store(text_chunks,embeddings) |
|
|
|
st.write(css,unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
question = st.text_input("Ask Question: ") |
|
|
|
if st.button('Ask Question'): |
|
with st.spinner("Processing"): |
|
if question: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chain = get_qa_chain(embeddings) |
|
response = chain(question) |
|
st.header("Answer: ") |
|
st.write(response["result"]) |
|
|
|
if __name__=='__main__': |
|
main() |