File size: 6,397 Bytes
b06ff0c
 
5ad9f7c
 
 
 
 
b06ff0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ad9f7c
 
b06ff0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ad9f7c
 
 
 
 
 
 
 
 
 
 
 
 
b06ff0c
 
 
 
 
 
5ad9f7c
b06ff0c
 
 
 
 
5ad9f7c
b06ff0c
 
5ad9f7c
 
 
 
 
 
 
 
b06ff0c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import streamlit as st
from dotenv import load_dotenv
from Components.FindKeyword import filter_keywords
from Components.PreprocessText import get_pdf_text
from Components.model_Responce import model_prediction
from Components.GooglePalmChat import get_qa_chain
from Components.Vector_db import encode_question, save_vector_store
from streamlit_extras.add_vertical_space import add_vertical_space
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from htmlTemplates import css, bot_template, user_template
from InstructorEmbedding import INSTRUCTOR
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

def button_function(all_text):
    # Add your desired functionality here
    # predictions = []
    for item in all_text:
        text = item['text']
        # filename = item['filename']
        pred = model_prediction(text)
        # predictions.append({"filename": filename, "prediction": pred})
        item['prediction'] = pred
    return all_text

# Main body
def main():
    # vector_store = None
    load_dotenv()
    st.header("Resume Filter using Keywords 💬")

    # Sidebar contents
    with st.sidebar:
        st.title('🤗💬 LLM Chat App')
        # upload a PDF file
        pdfs = st.file_uploader("Upload your Resumes", type='pdf',accept_multiple_files=True)

        # Get user preference for matching keywords
        # match_all_keywords = st.checkbox("Match All Keywords")

        # Choose functionality: Prediction or Filtering
        functionality = st.radio("Choose functionality:", ("Make Predictions", "Filter Keywords","Predict the Suitable canditate","Ask Questions"))
        # if functionality == "Ask Questions":
            
        add_vertical_space(5)
        st.write('Made with ❤️ by Fazni Farook')


    if pdfs is not None:
        all_text = get_pdf_text(pdfs)

        # if 'conversation' not in st.session_state:
        #     st.session_state.conversation = None

        # if 'chat_history' not in st.session_state:
        #     st.session_state.chat_history = None

        if functionality == "Make Predictions":
            if st.button('Make Prediction'):
                with st.spinner("Progressing"):
                    all_text = button_function(all_text)

                    for item in all_text:
                        filename = item["filename"]
                        text = item["text"]
                        pred = item["prediction"]
                        st.markdown(f"**Filename: {filename}**")
                        # st.markdown(text, unsafe_allow_html=True)
                        st.markdown(f"**Prediction: {pred}**")
                        st.markdown("---")

        elif functionality == "Filter Keywords":
            # getting the keywords
            keyword_input  = st.text_input("Keyword")
            keywords = [keyword.strip() for keyword in keyword_input.split(",")]

            if st.button('Filter Keywords'):
                with st.spinner("Progressing"):
                    filtered_text = filter_keywords(all_text, keywords)

                    for item in filtered_text:
                        filename = item["filename"]
                        text = item["text"]
                        st.markdown(f"**Filename: {filename}**")
                        st.markdown(text, unsafe_allow_html=True)
                        st.markdown("---")

        elif functionality == "Predict the Suitable canditate":
            # getting the keywords
            keyword  = st.text_input("Keyword")

            if st.button('Filter Resumes'):
                with st.spinner("Progressing"):
                    all_text = button_function(all_text)
                    # filtered_text = filter_keywords(all_text, keywords)
                    count = 0
                    for item in all_text:
                        filename = item["filename"]
                        prediction = item["prediction"]
                        if keyword.lower()==prediction.lower():
                            count+=1
                            st.markdown(f"**Filename: {filename}**")
                            st.markdown(prediction, unsafe_allow_html=True)
                            st.markdown("---")
                    
                    if count==0:
                        st.markdown("No match found")

        elif functionality == "Ask Questions":

            embeddings = HuggingFaceInstructEmbeddings()

            # new_db = FAISS.load_local("faiss_index_V2", embeddings)

            if st.button('Create Knowledgebase'):
                with st.spinner("Processing"):
                    # embeddings = HuggingFaceInstructEmbeddings()
                    # get pdf text
                    raw_text = get_pdf_text(pdfs, preprocess=False)

                    # get the text chunk
                    text_chunks = get_text_chunks(raw_text)

                    # create vector store
                    save_vector_store(text_chunks,embeddings)

            st.write(css,unsafe_allow_html=True)

            # create conversation chain
            # st.session_state.conversation = get_conversation_chain(vector_store)

            question = st.text_input("Ask Question: ")

            if st.button('Ask Question'):
                with st.spinner("Processing"):
                    if question:
                        # Convert the question to a vector
                        # question_vector = encode_question(question,embeddings)

                        # Convert the vector store to a compatible format
                        # output = new_db.similarity_search_by_vector(question_vector)
                        # page_content = output[0].page_content

                        # Asking Questions using Google Palm
                        chain = get_qa_chain(embeddings)
                        response = chain(question)
                        st.header("Answer: ")
                        st.write(response["result"])
                
if __name__=='__main__': 
    main()