Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
mvectors
Browse files- .gitignore +1 -0
- requirements.txt +0 -1
- semantic_search/llm_eval.py +24 -52
.gitignore
CHANGED
|
@@ -10,3 +10,4 @@ split_pdf_csv/
|
|
| 10 |
uploaded_images/
|
| 11 |
images/
|
| 12 |
gen_images/
|
|
|
|
|
|
| 10 |
uploaded_images/
|
| 11 |
images/
|
| 12 |
gen_images/
|
| 13 |
+
app.zip
|
requirements.txt
CHANGED
|
@@ -22,4 +22,3 @@ matplotlib
|
|
| 22 |
scipy
|
| 23 |
seaborn
|
| 24 |
Pillow
|
| 25 |
-
nltk
|
|
|
|
| 22 |
scipy
|
| 23 |
seaborn
|
| 24 |
Pillow
|
|
|
semantic_search/llm_eval.py
CHANGED
|
@@ -10,41 +10,38 @@ import logging
|
|
| 10 |
import requests
|
| 11 |
import numpy as np
|
| 12 |
import pandas as pd
|
| 13 |
-
from PIL import Image
|
| 14 |
from typing import List
|
| 15 |
from botocore.auth import SigV4Auth
|
| 16 |
-
from langchain.llms.bedrock import Bedrock
|
| 17 |
from botocore.awsrequest import AWSRequest
|
| 18 |
import streamlit as st
|
| 19 |
import re
|
| 20 |
-
import numpy as np
|
| 21 |
from sklearn.metrics import ndcg_score,dcg_score
|
| 22 |
from sklearn import preprocessing as pre
|
| 23 |
-
import invoke_models
|
| 24 |
|
| 25 |
-
bedrock_ = boto3.client(
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
)
|
| 30 |
|
| 31 |
-
inference_modifier = {
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
}
|
| 38 |
-
textgen_llm = Bedrock(
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
)
|
| 43 |
|
| 44 |
|
| 45 |
#@st.cache_data
|
| 46 |
def eval(question, answers):
|
| 47 |
-
#if()
|
| 48 |
search_results: str = ""
|
| 49 |
prompt: str = """Human: You are a grader assessing relevance of a retrieved document to a user question. \n
|
| 50 |
The User question and Retrieved documents are provided below. The Retrieved documents are retail product descriptions that the human is looking for. \n
|
|
@@ -73,21 +70,11 @@ def eval(question, answers):
|
|
| 73 |
search_results += f"Index: {index_}, Description: {desc}\n\n"
|
| 74 |
index_ = index_+1
|
| 75 |
prompt = prompt.format(query, search_results)
|
| 76 |
-
|
| 77 |
-
#
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
response = textgen_llm(prompt)
|
| 81 |
-
#invoke_models.invoke_llm_model(prompt,False)
|
| 82 |
-
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>")
|
| 83 |
-
print(response)
|
| 84 |
inter_trim =response.split("[")[1]
|
| 85 |
final_out = json.loads('{"results":['+inter_trim.split("]")[0]+']}')
|
| 86 |
-
#final_out_sorted_desc = sorted(final_out['results'], key=lambda d: d['Score'],reverse=True)
|
| 87 |
-
# print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>")
|
| 88 |
-
# print(final_out_sorted_desc)
|
| 89 |
-
|
| 90 |
-
#true_relevance = np.asarray([[10, 0, 0, 1, 5]])
|
| 91 |
llm_scores = []
|
| 92 |
current_scores = []
|
| 93 |
for idx,i in enumerate(answers[0]['answer']):
|
|
@@ -105,7 +92,6 @@ def eval(question, answers):
|
|
| 105 |
|
| 106 |
|
| 107 |
|
| 108 |
-
# llm_scores.sort(reverse = True)
|
| 109 |
x = np.array(llm_scores)
|
| 110 |
x = x.reshape(-1, 1)
|
| 111 |
x_norm = (pre.MinMaxScaler().fit_transform(x)).flatten().tolist()
|
|
@@ -116,24 +102,13 @@ def eval(question, answers):
|
|
| 116 |
|
| 117 |
|
| 118 |
st.session_state.answers = answers
|
| 119 |
-
|
| 120 |
-
# print(x_norm)
|
| 121 |
-
# print(y_norm)
|
| 122 |
-
|
| 123 |
dcg = dcg_score(np.asarray([llm_scores]),np.asarray([current_scores]))
|
| 124 |
-
|
| 125 |
-
|
| 126 |
# IDCG score
|
| 127 |
idcg = dcg_score(np.asarray([llm_scores]),np.asarray([llm_scores]))
|
| 128 |
-
# print("IDCG score : ", idcg)
|
| 129 |
|
| 130 |
# Normalized DCG score
|
| 131 |
ndcg = dcg
|
| 132 |
-
|
| 133 |
-
# print(st.session_state.input_ndcg)
|
| 134 |
-
# if(st.session_state.input_previous_query!=""):
|
| 135 |
-
# if(st.session_state.input_previous_query == st.session_state.input_text):
|
| 136 |
-
# st.session_state.input_ndcg=0.0
|
| 137 |
if(ndcg > st.session_state.input_ndcg and st.session_state.input_ndcg!=0.0):
|
| 138 |
st.session_state.ndcg_increase = "↑~"+str('%.3f'%(ndcg-st.session_state.input_ndcg ))
|
| 139 |
elif(ndcg < st.session_state.input_ndcg):
|
|
@@ -143,7 +118,4 @@ def eval(question, answers):
|
|
| 143 |
|
| 144 |
|
| 145 |
|
| 146 |
-
st.session_state.input_ndcg = ndcg
|
| 147 |
-
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>")
|
| 148 |
-
print(st.session_state.input_ndcg)
|
| 149 |
-
|
|
|
|
| 10 |
import requests
|
| 11 |
import numpy as np
|
| 12 |
import pandas as pd
|
|
|
|
| 13 |
from typing import List
|
| 14 |
from botocore.auth import SigV4Auth
|
| 15 |
+
#from langchain.llms.bedrock import Bedrock
|
| 16 |
from botocore.awsrequest import AWSRequest
|
| 17 |
import streamlit as st
|
| 18 |
import re
|
|
|
|
| 19 |
from sklearn.metrics import ndcg_score,dcg_score
|
| 20 |
from sklearn import preprocessing as pre
|
| 21 |
+
import invoke_models#invoke_llm_model
|
| 22 |
|
| 23 |
+
# bedrock_ = boto3.client(
|
| 24 |
+
# 'bedrock-runtime',
|
| 25 |
+
# aws_access_key_id=st.secrets['user_access_key'],
|
| 26 |
+
# aws_secret_access_key=st.secrets['user_secret_key'], region_name = 'us-east-1'
|
| 27 |
+
# )
|
| 28 |
|
| 29 |
+
# inference_modifier = {
|
| 30 |
+
# "max_tokens_to_sample": 4096,
|
| 31 |
+
# "temperature": 0,
|
| 32 |
+
# "top_k": 250,
|
| 33 |
+
# "top_p": 1,
|
| 34 |
+
# "stop_sequences": ["\n\nHuman"],
|
| 35 |
+
# }
|
| 36 |
+
# textgen_llm = Bedrock(
|
| 37 |
+
# model_id="anthropic.claude-v2:1",
|
| 38 |
+
# client=bedrock_,
|
| 39 |
+
# model_kwargs=inference_modifier,
|
| 40 |
+
# )
|
| 41 |
|
| 42 |
|
| 43 |
#@st.cache_data
|
| 44 |
def eval(question, answers):
|
|
|
|
| 45 |
search_results: str = ""
|
| 46 |
prompt: str = """Human: You are a grader assessing relevance of a retrieved document to a user question. \n
|
| 47 |
The User question and Retrieved documents are provided below. The Retrieved documents are retail product descriptions that the human is looking for. \n
|
|
|
|
| 70 |
search_results += f"Index: {index_}, Description: {desc}\n\n"
|
| 71 |
index_ = index_+1
|
| 72 |
prompt = prompt.format(query, search_results)
|
| 73 |
+
response = invoke_llm_model.invoke_llm_model(prompt,False)
|
| 74 |
+
#response = textgen_llm(prompt)
|
| 75 |
+
print("Response from LLM: ", response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
inter_trim =response.split("[")[1]
|
| 77 |
final_out = json.loads('{"results":['+inter_trim.split("]")[0]+']}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
llm_scores = []
|
| 79 |
current_scores = []
|
| 80 |
for idx,i in enumerate(answers[0]['answer']):
|
|
|
|
| 92 |
|
| 93 |
|
| 94 |
|
|
|
|
| 95 |
x = np.array(llm_scores)
|
| 96 |
x = x.reshape(-1, 1)
|
| 97 |
x_norm = (pre.MinMaxScaler().fit_transform(x)).flatten().tolist()
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
st.session_state.answers = answers
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
dcg = dcg_score(np.asarray([llm_scores]),np.asarray([current_scores]))
|
| 106 |
+
|
|
|
|
| 107 |
# IDCG score
|
| 108 |
idcg = dcg_score(np.asarray([llm_scores]),np.asarray([llm_scores]))
|
|
|
|
| 109 |
|
| 110 |
# Normalized DCG score
|
| 111 |
ndcg = dcg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
if(ndcg > st.session_state.input_ndcg and st.session_state.input_ndcg!=0.0):
|
| 113 |
st.session_state.ndcg_increase = "↑~"+str('%.3f'%(ndcg-st.session_state.input_ndcg ))
|
| 114 |
elif(ndcg < st.session_state.input_ndcg):
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
|
| 121 |
+
st.session_state.input_ndcg = ndcg
|
|
|
|
|
|
|
|
|