Spaces:
Running
on
T4
Running
on
T4
mvectors
Browse files- .gitignore +1 -0
- requirements.txt +0 -1
- semantic_search/llm_eval.py +24 -52
.gitignore
CHANGED
@@ -10,3 +10,4 @@ split_pdf_csv/
|
|
10 |
uploaded_images/
|
11 |
images/
|
12 |
gen_images/
|
|
|
|
10 |
uploaded_images/
|
11 |
images/
|
12 |
gen_images/
|
13 |
+
app.zip
|
requirements.txt
CHANGED
@@ -22,4 +22,3 @@ matplotlib
|
|
22 |
scipy
|
23 |
seaborn
|
24 |
Pillow
|
25 |
-
nltk
|
|
|
22 |
scipy
|
23 |
seaborn
|
24 |
Pillow
|
|
semantic_search/llm_eval.py
CHANGED
@@ -10,41 +10,38 @@ import logging
|
|
10 |
import requests
|
11 |
import numpy as np
|
12 |
import pandas as pd
|
13 |
-
from PIL import Image
|
14 |
from typing import List
|
15 |
from botocore.auth import SigV4Auth
|
16 |
-
from langchain.llms.bedrock import Bedrock
|
17 |
from botocore.awsrequest import AWSRequest
|
18 |
import streamlit as st
|
19 |
import re
|
20 |
-
import numpy as np
|
21 |
from sklearn.metrics import ndcg_score,dcg_score
|
22 |
from sklearn import preprocessing as pre
|
23 |
-
import invoke_models
|
24 |
|
25 |
-
bedrock_ = boto3.client(
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
)
|
30 |
|
31 |
-
inference_modifier = {
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
}
|
38 |
-
textgen_llm = Bedrock(
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
)
|
43 |
|
44 |
|
45 |
#@st.cache_data
|
46 |
def eval(question, answers):
|
47 |
-
#if()
|
48 |
search_results: str = ""
|
49 |
prompt: str = """Human: You are a grader assessing relevance of a retrieved document to a user question. \n
|
50 |
The User question and Retrieved documents are provided below. The Retrieved documents are retail product descriptions that the human is looking for. \n
|
@@ -73,21 +70,11 @@ def eval(question, answers):
|
|
73 |
search_results += f"Index: {index_}, Description: {desc}\n\n"
|
74 |
index_ = index_+1
|
75 |
prompt = prompt.format(query, search_results)
|
76 |
-
|
77 |
-
#
|
78 |
-
|
79 |
-
|
80 |
-
response = textgen_llm(prompt)
|
81 |
-
#invoke_models.invoke_llm_model(prompt,False)
|
82 |
-
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>")
|
83 |
-
print(response)
|
84 |
inter_trim =response.split("[")[1]
|
85 |
final_out = json.loads('{"results":['+inter_trim.split("]")[0]+']}')
|
86 |
-
#final_out_sorted_desc = sorted(final_out['results'], key=lambda d: d['Score'],reverse=True)
|
87 |
-
# print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>")
|
88 |
-
# print(final_out_sorted_desc)
|
89 |
-
|
90 |
-
#true_relevance = np.asarray([[10, 0, 0, 1, 5]])
|
91 |
llm_scores = []
|
92 |
current_scores = []
|
93 |
for idx,i in enumerate(answers[0]['answer']):
|
@@ -105,7 +92,6 @@ def eval(question, answers):
|
|
105 |
|
106 |
|
107 |
|
108 |
-
# llm_scores.sort(reverse = True)
|
109 |
x = np.array(llm_scores)
|
110 |
x = x.reshape(-1, 1)
|
111 |
x_norm = (pre.MinMaxScaler().fit_transform(x)).flatten().tolist()
|
@@ -116,24 +102,13 @@ def eval(question, answers):
|
|
116 |
|
117 |
|
118 |
st.session_state.answers = answers
|
119 |
-
|
120 |
-
# print(x_norm)
|
121 |
-
# print(y_norm)
|
122 |
-
|
123 |
dcg = dcg_score(np.asarray([llm_scores]),np.asarray([current_scores]))
|
124 |
-
|
125 |
-
|
126 |
# IDCG score
|
127 |
idcg = dcg_score(np.asarray([llm_scores]),np.asarray([llm_scores]))
|
128 |
-
# print("IDCG score : ", idcg)
|
129 |
|
130 |
# Normalized DCG score
|
131 |
ndcg = dcg
|
132 |
-
|
133 |
-
# print(st.session_state.input_ndcg)
|
134 |
-
# if(st.session_state.input_previous_query!=""):
|
135 |
-
# if(st.session_state.input_previous_query == st.session_state.input_text):
|
136 |
-
# st.session_state.input_ndcg=0.0
|
137 |
if(ndcg > st.session_state.input_ndcg and st.session_state.input_ndcg!=0.0):
|
138 |
st.session_state.ndcg_increase = "↑~"+str('%.3f'%(ndcg-st.session_state.input_ndcg ))
|
139 |
elif(ndcg < st.session_state.input_ndcg):
|
@@ -143,7 +118,4 @@ def eval(question, answers):
|
|
143 |
|
144 |
|
145 |
|
146 |
-
st.session_state.input_ndcg = ndcg
|
147 |
-
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>")
|
148 |
-
print(st.session_state.input_ndcg)
|
149 |
-
|
|
|
10 |
import requests
|
11 |
import numpy as np
|
12 |
import pandas as pd
|
|
|
13 |
from typing import List
|
14 |
from botocore.auth import SigV4Auth
|
15 |
+
#from langchain.llms.bedrock import Bedrock
|
16 |
from botocore.awsrequest import AWSRequest
|
17 |
import streamlit as st
|
18 |
import re
|
|
|
19 |
from sklearn.metrics import ndcg_score,dcg_score
|
20 |
from sklearn import preprocessing as pre
|
21 |
+
import invoke_models#invoke_llm_model
|
22 |
|
23 |
+
# bedrock_ = boto3.client(
|
24 |
+
# 'bedrock-runtime',
|
25 |
+
# aws_access_key_id=st.secrets['user_access_key'],
|
26 |
+
# aws_secret_access_key=st.secrets['user_secret_key'], region_name = 'us-east-1'
|
27 |
+
# )
|
28 |
|
29 |
+
# inference_modifier = {
|
30 |
+
# "max_tokens_to_sample": 4096,
|
31 |
+
# "temperature": 0,
|
32 |
+
# "top_k": 250,
|
33 |
+
# "top_p": 1,
|
34 |
+
# "stop_sequences": ["\n\nHuman"],
|
35 |
+
# }
|
36 |
+
# textgen_llm = Bedrock(
|
37 |
+
# model_id="anthropic.claude-v2:1",
|
38 |
+
# client=bedrock_,
|
39 |
+
# model_kwargs=inference_modifier,
|
40 |
+
# )
|
41 |
|
42 |
|
43 |
#@st.cache_data
|
44 |
def eval(question, answers):
|
|
|
45 |
search_results: str = ""
|
46 |
prompt: str = """Human: You are a grader assessing relevance of a retrieved document to a user question. \n
|
47 |
The User question and Retrieved documents are provided below. The Retrieved documents are retail product descriptions that the human is looking for. \n
|
|
|
70 |
search_results += f"Index: {index_}, Description: {desc}\n\n"
|
71 |
index_ = index_+1
|
72 |
prompt = prompt.format(query, search_results)
|
73 |
+
response = invoke_llm_model.invoke_llm_model(prompt,False)
|
74 |
+
#response = textgen_llm(prompt)
|
75 |
+
print("Response from LLM: ", response)
|
|
|
|
|
|
|
|
|
|
|
76 |
inter_trim =response.split("[")[1]
|
77 |
final_out = json.loads('{"results":['+inter_trim.split("]")[0]+']}')
|
|
|
|
|
|
|
|
|
|
|
78 |
llm_scores = []
|
79 |
current_scores = []
|
80 |
for idx,i in enumerate(answers[0]['answer']):
|
|
|
92 |
|
93 |
|
94 |
|
|
|
95 |
x = np.array(llm_scores)
|
96 |
x = x.reshape(-1, 1)
|
97 |
x_norm = (pre.MinMaxScaler().fit_transform(x)).flatten().tolist()
|
|
|
102 |
|
103 |
|
104 |
st.session_state.answers = answers
|
|
|
|
|
|
|
|
|
105 |
dcg = dcg_score(np.asarray([llm_scores]),np.asarray([current_scores]))
|
106 |
+
|
|
|
107 |
# IDCG score
|
108 |
idcg = dcg_score(np.asarray([llm_scores]),np.asarray([llm_scores]))
|
|
|
109 |
|
110 |
# Normalized DCG score
|
111 |
ndcg = dcg
|
|
|
|
|
|
|
|
|
|
|
112 |
if(ndcg > st.session_state.input_ndcg and st.session_state.input_ndcg!=0.0):
|
113 |
st.session_state.ndcg_increase = "↑~"+str('%.3f'%(ndcg-st.session_state.input_ndcg ))
|
114 |
elif(ndcg < st.session_state.input_ndcg):
|
|
|
118 |
|
119 |
|
120 |
|
121 |
+
st.session_state.input_ndcg = ndcg
|
|
|
|
|
|