Nechba commited on
Commit
7aaad1d
·
1 Parent(s): 5c51e85

first commit

Browse files
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utlis.helper import *
2
+ import sqlite3
3
+ import hashlib
4
+ import io
5
+
6
+
7
+
8
+ initialize_session_state()
9
+
10
+ with st.sidebar:
11
+ st.image("logo.png", width=170)
12
+ st.title("Config Settings")
13
+ # Get List of models
14
+
15
+ st.session_state.genre = st.radio(
16
+ "Choose option",
17
+ [ "Select Service", "Add service"])#,"Delete service"])
18
+
19
+ if st.session_state.genre=="Add service":
20
+ st.title('Add service')
21
+ # Check service status
22
+ # Get all available services
23
+ add_new_service = st.checkbox("Add new service")
24
+ if add_new_service:
25
+ new_service = st.text_input("Enter service name")
26
+ # Get list of Embedding models
27
+
28
+ if new_service and st.button('Add'):
29
+ add_service(st.session_state.token,new_service)
30
+ data = {"token": st.session_state.token}
31
+ json_data = json.dumps(data)
32
+ headers = {'Content-Type': 'application/json'}
33
+ services = requests.get(SERVICES_API,data=json_data, headers=headers)
34
+ services =json.loads(services.text)
35
+ if len(services)>0:
36
+ st.session_state.service = st.selectbox("Choose Service",services)
37
+
38
+ st.session_state.uploaded_files = st.file_uploader("Upload CVs", type=["pdf","doc"], accept_multiple_files=True)
39
+ if st.session_state.uploaded_files:
40
+ st.session_state.process = st.button('Process')
41
+ if st.session_state.process:
42
+ with st.spinner("Processing..."):
43
+ add_document(st.session_state.token,st.session_state.service)
44
+
45
+ elif st.session_state.genre=="Select Service":
46
+ st.title('Choose Service')
47
+ data = {"token": st.session_state.token}
48
+ json_data = json.dumps(data)
49
+ headers = {'Content-Type': 'application/json'}
50
+ services = requests.get(SERVICES_API,data=json_data, headers=headers)
51
+ services =json.loads(services.text)
52
+
53
+ if len(services)>0:
54
+ st.session_state.service_slected_to_chat = st.selectbox(" ",services)
55
+ st.session_state.top_k=st.slider("Number of Candidates", min_value=1, max_value=10, value=3, step=1)
56
+ # data = {"token": st.session_state.token, "servicename": st.session_state.service_slected_to_chat}
57
+ # json_data = json.dumps(data)
58
+ # headers = {'Content-Type': 'application/json'}
59
+ # history_document = requests.get(DOCUMENT_API,data=json_data, headers=headers)
60
+ # history_document =json.loads(history_document.text).get("documents",[])
61
+ # history_document = [doc["documentname"] for doc in history_document]
62
+
63
+
64
+
65
+ # elif st.session_state.genre == "Delete service":
66
+ # st.title('Delete Service')
67
+ # data = {"token": st.session_state.token}
68
+ # json_data = json.dumps(data)
69
+ # headers = {'Content-Type': 'application/json'}
70
+ # services = requests.get(SERVICES_API,data=json_data, headers=headers)
71
+ # services =json.loads(services.text)
72
+ # if len(services)>=2:
73
+ # services.append("ALL")
74
+ # # Get list of documents from histrory
75
+ # if "ALL" in services:
76
+ # service_slected = st.multiselect(
77
+ # "",services ,default="ALL"
78
+ # )
79
+ # elif len(services)==1:
80
+ # service_slected = st.multiselect(
81
+ # "",services,default=services[0]
82
+ # )
83
+ # else:
84
+ # service_slected = st.multiselect(
85
+ # "",services
86
+ # )
87
+ # if "ALL" in service_slected:
88
+ # service_slected = services
89
+ # service_slected.remove("ALL")
90
+ # st.write("You selected:", service_slected)
91
+
92
+ # if len(service_slected) > 0:
93
+ # st.session_state.delete = st.button('Delete')
94
+ # if st.session_state.delete:
95
+ # delete_service(st.session_state.token ,service_slected)
96
+
97
+ # elif st.session_state.genre == "Delete CV(s)":
98
+ # st.title('Delete CV(s)')
99
+ # data = {"token": st.session_state.token}
100
+ # json_data = json.dumps(data)
101
+ # headers = {'Content-Type': 'application/json'}
102
+ # services = requests.get(SERVICES_API,data=json_data, headers=headers)
103
+ # services =json.loads(services.text)
104
+ # if len(services)>0:
105
+ # service = st.selectbox("Choose Service",services)
106
+ # data = {"token": st.session_state.token, "servicename": service}
107
+ # json_data = json.dumps(data)
108
+ # headers = {'Content-Type': 'application/json'}
109
+
110
+
111
+ # st.write("You selected:", document_slected_to_delete)
112
+ # if len(document_slected_to_delete) > 0:
113
+ # st.session_state.delete = st.button('Delete')
114
+ # if st.session_state.delete:
115
+ # delete_document(st.session_state.token,st.session_state.service ,document_slected_to_delete)
116
+
117
+ # css_style = """
118
+ # <style>
119
+ # .title {
120
+ # white-space: nowrap;
121
+ # }
122
+ # </style>
123
+ # """
124
+
125
+ # st.markdown(css_style, unsafe_allow_html=True)
126
+ st.markdown("""
127
+ <style>
128
+ .st-bm {
129
+ color: #1E90FF; /* DodgerBlue color */
130
+ }
131
+ .card {
132
+ border-radius: 10px;
133
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
134
+ padding: 15px;
135
+ margin: 10px 0;
136
+ transition: box-shadow 0.3s ease-in-out;
137
+ }
138
+ .card:hover {
139
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2);
140
+ }
141
+ .highlight {
142
+ color: #1E90FF; /* Custom color */
143
+ }
144
+ </style>
145
+ """, unsafe_allow_html=True)
146
+
147
+ with st.container():
148
+ st.markdown('<h1 class="title">SmartHire Matcher</h1>', unsafe_allow_html=True)
149
+ col1, col2 = st.columns([3, 1])
150
+ if st.session_state.genre=="Select Service" and st.session_state.service_slected_to_chat:
151
+ query = st.text_area("Add description of your offer:", height=300)
152
+
153
+ if query and st.button('Process') :
154
+ with st.spinner("Finding Matching CVs..."):
155
+ results = search_document( index_name= "cvindex",token= "abcd",service_name= st.session_state.service_slected_to_chat,query= query,top_k= st.session_state.top_k)
156
+ st.session_state.results_str = results.decode('utf-8')
157
+
158
+ # Displaying results
159
+ try:
160
+ if st.session_state.results_str: # check if there are results
161
+ results = json.loads(st.session_state.results_str)
162
+
163
+ for index, item in enumerate(results):
164
+ with st.container():
165
+ col1, col2 = st.columns([3, 1])
166
+ with col1:
167
+ st.markdown(f"<div class='card'><h3><span class='highlight'>Top:</span> {index+1}</h3><p><span class='highlight'>Score:</span> {round(item['score']*100, 3)}%</p><p><span class='highlight'>Document Name:</span> {item['documentname'].replace('_pdf', ' ')}</p></div>", unsafe_allow_html=True)
168
+
169
+ #st.markdown(f"**Tag:** {item['tag']}")
170
+ #st.markdown(f"**Score:** {round(item['score'], 3)}")
171
+ #st.markdown(f"**Document Name:** {item['documentname'].replace('_pdf', ' ')}")
172
+ with col2:
173
+ b64_pdf = item['encoded_cv']
174
+ pdf = base64.b64decode(b64_pdf)
175
+ pdf_file = io.BytesIO(pdf)
176
+ st.download_button("Download CV", data=pdf_file, file_name=item['documentname'].replace('_', '.'), mime='application/pdf', key=f"download_{index}_{item['documentname']}")
177
+ if not results:
178
+ st.error("No results found.")
179
+ except Exception as e:
180
+ st.error("Failed to load results. Please try again later."+ str(e))
181
+
logo.png ADDED
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ pydantic
4
+ requests
5
+ typing
6
+ redis
7
+ numpy
8
+ gevent
9
+ PyPDF2
10
+ pdfplumber
11
+ openai
12
+ google-generativeai==0.7.0
utlis/__init__.py ADDED
File without changes
utlis/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (158 Bytes). View file
 
utlis/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (140 Bytes). View file
 
utlis/__pycache__/constant.cpython-310.pyc ADDED
Binary file (857 Bytes). View file
 
utlis/__pycache__/constant.cpython-39.pyc ADDED
Binary file (842 Bytes). View file
 
utlis/__pycache__/helper.cpython-310.pyc ADDED
Binary file (5.25 kB). View file
 
utlis/__pycache__/helper.cpython-39.pyc ADDED
Binary file (7.37 kB). View file
 
utlis/constant.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # IP_WEB_SERVER = "https://f564-196-65-150-53.ngrok-free.app"
3
+ # IP_MODEL_SERVER = "https://fluffy-mole-81.telebit.io"
4
+ IP_WEB_SERVER = "https://36c9-41-248-140-202.ngrok-free.app"
5
+ #IP_WEB_SERVER = "http://localhost:8000"
6
+ SERVICES_API = IP_WEB_SERVER+"/services/"
7
+ ADD_SERVICES_API = IP_WEB_SERVER+"/add_services"
8
+ ADD_STORE_DOCUMENT = IP_WEB_SERVER+"/add_and_store_document"
9
+ SEARCH_API = IP_WEB_SERVER+"/serach"
10
+ DOCUMENT_API = IP_WEB_SERVER+"/documents"
11
+ REMOVE_DOCUMENTS_API = IP_WEB_SERVER+"/remove_documents"
12
+ REMOVE_SERVICE_API = IP_WEB_SERVER+"/remove_service"
13
+ GET_NUM_PAGES = IP_WEB_SERVER+"/get_num_pages"
14
+ RESPONSE_API = IP_WEB_SERVER+"/structure_response"
15
+ RESPONSE_TXT_API = IP_WEB_SERVER+"/structure_response_text"
16
+ DEFAULT_SCHEMA = {
17
+ "GeographicContext": "<variable>",
18
+ "SubGeographicContext": "<variable>",
19
+ "Channel": "<variable>",
20
+ "RateType": "<variable>",
21
+ "Notes": ["<variable>"],
22
+ "Rates": [
23
+ {
24
+ "PaymentProduct": "<variable>",
25
+ "Details": [
26
+ {
27
+ "FeeTier": "<variable>",
28
+ "IRD": ["<variable>"],
29
+ "Rate": "<variable>"
30
+ },
31
+ ]
32
+ },
33
+ ]
34
+ }
utlis/helper.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdfplumber
2
+ import streamlit as st
3
+ import requests
4
+ import json
5
+ import redis
6
+ import redis.commands.search
7
+ from redis.commands.search.field import TagField, VectorField, TextField
8
+ from redis.commands.search.indexDefinition import IndexDefinition, IndexType
9
+ import logging
10
+ from redis.commands.search.query import Query
11
+ import numpy as np
12
+ from typing import List, Dict, Any
13
+ from utlis.constant import *
14
+ from PIL import Image
15
+ import google.generativeai as genai
16
+ genai.configure(api_key="AIzaSyAhz9UBzkEIYI886zZRm40qqB1Kd_9Y4-0")
17
+ import base64
18
+ import sqlite3
19
+ def initialize_session_state():
20
+ if "doc_ortext" not in st.session_state:
21
+ st.session_state["doc_ortext"] = None
22
+ if "token" not in st.session_state:
23
+ st.session_state["token"] ="abcd"
24
+ if "service" not in st.session_state:
25
+ st.session_state["service"] = None
26
+ if "results_str" not in st.session_state:
27
+ st.session_state.results_str = False
28
+ if "service_slected_to_chat" not in st.session_state:
29
+ st.session_state.service_slected_to_chat = False
30
+ if "embdding_model" not in st.session_state:
31
+ st.session_state["embdding_model"] = None
32
+ if "indexing_method" not in st.session_state:
33
+ st.session_state["indexing_method"] = None
34
+ if "uploaded_files" not in st.session_state:
35
+ st.session_state["uploaded_files"] = None
36
+
37
+ if "messages" not in st.session_state:
38
+ st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
39
+
40
+
41
+ def extract_text_from_pdf(pdf_path):
42
+ text=""
43
+ with pdfplumber.open(pdf_path) as pdf:
44
+ for page_number, page in enumerate(pdf.pages, start=1):
45
+ # Try to extract the text
46
+ text+= page.extract_text(x_tolerance=2, y_tolerance=4, layout=True, x_density=5, y_density=10)
47
+ return text
48
+
49
+ def delete_service(token,service_slected_to_delete):
50
+ for srevice_name in service_slected_to_delete:
51
+ url = REMOVE_SERVICE_API
52
+ # JSON payload to be sent
53
+ data = {
54
+ "token": token,
55
+ "servicename": srevice_name
56
+ }
57
+ json_data = json.dumps(data)
58
+
59
+ # Set the headers to specify that the content type is JSON
60
+ headers = {'Content-Type': 'application/json'}
61
+
62
+ # Send the POST request
63
+ response = requests.delete(url, data=json_data, headers=headers)
64
+ if json.loads( response.text).get("success")==True:
65
+ st.success(f"{srevice_name} deleted successfully")
66
+ else:
67
+ st.error(f"{srevice_name} not deleted successfully")
68
+
69
+ def delete_document(token, service,document_slected_to_delete):
70
+ print(document_slected_to_delete)
71
+ # for document_name in document_slected_to_delete:
72
+ url = REMOVE_DOCUMENTS_API
73
+ # JSON payload to be sent
74
+ data = {
75
+ "token": token,
76
+ "service_name": service,
77
+ "document_names":document_slected_to_delete
78
+ }
79
+
80
+ # Convert the dictionary to a JSON formatted string
81
+ json_data = json.dumps(data)
82
+ # Set the headers to specify that the content type is JSON
83
+ headers = {'Content-Type': 'application/json'}
84
+
85
+ # Send the POST request
86
+ response = requests.delete(url, data=json_data, headers=headers)
87
+ print(response)
88
+ if json.loads( response.text).get("status")=="success":
89
+ st.success("document(s) deleted successfully")
90
+ else:
91
+ st.error("document(s) not deleted successfully")
92
+ def gemini_vision(file):
93
+ load_image = Image.open(file)
94
+ prompt= "please extract all text fromt this image"
95
+ model = genai.GenerativeModel('gemini-pro-vision')
96
+ response = model.generate_content([prompt, load_image])
97
+
98
+ return response.text
99
+ def add_service(token,servicename):
100
+ url = ADD_SERVICES_API
101
+ # JSON payload to be sent
102
+ data = {
103
+ "token": token,
104
+ "services": [
105
+ {
106
+ "servicename": servicename
107
+ }
108
+ ]
109
+ }
110
+
111
+ # Convert the dictionary to a JSON formatted string
112
+ json_data = json.dumps(data)
113
+
114
+ # Set the headers to specify that the content type is JSON
115
+ headers = {'Content-Type': 'application/json'}
116
+
117
+ # Send the POST request
118
+ response = requests.post(url, data=json_data, headers=headers)
119
+ if json.loads( response.text).get("added_services",None):
120
+ st.success(f"{servicename} added successfully")
121
+ else:
122
+ st.error(json.loads( response.text).get("message",None))
123
+ def add_text_document(token, servicename):
124
+ # Retrieve text and document name from session state
125
+ document_text = st.session_state.text_area
126
+ document_name = st.session_state.name_text_area.replace(" ", "_").replace("(", "_").replace(")", "_").replace("-", "_").replace(".", "_")
127
+
128
+ # Encode the document text as Base64
129
+ encoded_text = base64.b64encode(document_text.encode('utf-8')).decode('utf-8')
130
+
131
+ url = ADD_STORE_DOCUMENT
132
+ # Prepare the JSON payload
133
+ data = {
134
+ "token": token,
135
+ "service_name": servicename,
136
+ "document_name": document_name,
137
+ "file": encoded_text # Assuming the API can handle Base64 encoded text under the 'file' key
138
+ }
139
+
140
+ # Convert the dictionary to a JSON formatted string and send the POST request
141
+ headers = {'Content-Type': 'application/json'}
142
+ response = requests.post(url, data=json.dumps(data), headers=headers)
143
+ status = json.loads(response.text).get("status")
144
+
145
+ if status == "success":
146
+ st.success(f"{document_name} uploaded successfully as text")
147
+ else:
148
+ st.error(f"{document_name} not uploaded successfully")
149
+
150
+ def add_document(token,servicename):
151
+
152
+
153
+
154
+ files = st.session_state.uploaded_files
155
+
156
+ for file in files:
157
+ url = ADD_STORE_DOCUMENT
158
+ # JSON payload to be sent
159
+ document_name = file.name.replace(" ","")
160
+ #document_name = document_name.replace(".pdf","")
161
+ document_name = document_name.replace("(","_")
162
+ document_name = document_name.replace(")","_")
163
+ document_name = document_name.replace("-","_")
164
+ document_name = document_name.replace(".","_")
165
+ encoded_file = base64.b64encode(file.read()).decode('utf-8')
166
+ data = {
167
+ "token": token,
168
+ "service_name": servicename,
169
+ "document_name": document_name,
170
+ "file":encoded_file
171
+ }
172
+
173
+ # Convert the dictionary to a JSON formatted string
174
+ json_data = json.dumps(data)
175
+
176
+ # Set the headers to specify that the content type is JSON
177
+ headers = {'Content-Type': 'application/json'}
178
+
179
+ # Send the POST request
180
+ response = requests.post(url, data=json_data, headers=headers)
181
+ document_name = file.name.replace(" ","_")
182
+ if json.loads( response.content).get("status")=="success":
183
+ st.success(f"{document_name} added successfully")
184
+ else:
185
+ st.error(f"{document_name} not added successfully")
186
+
187
+
188
+
189
+
190
+ def search_document(index_name,token,service_name,query, top_k ):
191
+ url = SEARCH_API
192
+ print(url)
193
+ # JSON payload to be sent
194
+ data = {
195
+ "index_name": index_name,
196
+ "token": token,
197
+ "service_name": service_name,
198
+ "query": query,
199
+ "top_k": top_k
200
+ }
201
+ # Convert the dictionary to a JSON formatted string
202
+ json_data = json.dumps(data)
203
+
204
+ # Set the headers to specify that the content type is JSON
205
+ headers = {'Content-Type': 'application/json'}
206
+
207
+ # Send the POST request
208
+ response = requests.post(url, data=json_data, headers=headers)
209
+ return response.content
210
+