OpenSearch-AI

Running on T4

App Files Files Community

prasadnu commited on 14 days ago

Commit

d0de3b9

1 Parent(s): 238ba3e

mvectors

Browse files

Files changed (3) hide show

.gitignore +1 -0
requirements.txt +0 -1
semantic_search/llm_eval.py +24 -52

.gitignore CHANGED Viewed

@@ -10,3 +10,4 @@ split_pdf_csv/
 uploaded_images/
 images/
 gen_images/

 uploaded_images/
 images/
 gen_images/
+app.zip

requirements.txt CHANGED Viewed

@@ -22,4 +22,3 @@ matplotlib
 scipy
 seaborn
 Pillow
-nltk

 scipy
 seaborn
 Pillow

semantic_search/llm_eval.py CHANGED Viewed

@@ -10,41 +10,38 @@ import logging
 import requests
 import numpy as np
 import pandas as pd
-from PIL import Image
 from typing import List
 from botocore.auth import SigV4Auth
-from langchain.llms.bedrock import Bedrock
 from botocore.awsrequest import AWSRequest
 import streamlit as st
 import re
-import numpy as np
 from sklearn.metrics import ndcg_score,dcg_score
 from sklearn import preprocessing as pre
-import invoke_models
-bedrock_ = boto3.client(
-    'bedrock-runtime',
-    aws_access_key_id=st.secrets['user_access_key'],
-    aws_secret_access_key=st.secrets['user_secret_key'], region_name = 'us-east-1'
-)
-inference_modifier = {
-    "max_tokens_to_sample": 4096,
-    "temperature": 0,
-    "top_k": 250,
-    "top_p": 1,
-    "stop_sequences": ["\n\nHuman"],
-}
-textgen_llm = Bedrock(
-    model_id="anthropic.claude-v2:1",
-    client=bedrock_,
-    model_kwargs=inference_modifier,
-)
 #@st.cache_data
 def eval(question, answers):
-    #if()
     search_results: str = ""
     prompt: str = """Human: You are a grader assessing relevance of a retrieved document to a user question. \n
     The User question and Retrieved documents are provided below. The Retrieved documents are retail product descriptions that the human is looking for. \n
@@ -73,21 +70,11 @@ def eval(question, answers):
         search_results += f"Index: {index_}, Description: {desc}\n\n"
         index_ = index_+1
     prompt = prompt.format(query, search_results)
-    # print(answers[0]['answer'])
-    # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>")
-    # print(prompt)
-    response = textgen_llm(prompt)
-    #invoke_models.invoke_llm_model(prompt,False)
-    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>")
-    print(response)
     inter_trim =response.split("[")[1]
     final_out = json.loads('{"results":['+inter_trim.split("]")[0]+']}')
-    #final_out_sorted_desc  = sorted(final_out['results'], key=lambda d: d['Score'],reverse=True)
-    # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>")
-    # print(final_out_sorted_desc)
-    #true_relevance = np.asarray([[10, 0, 0, 1, 5]])
     llm_scores = []
     current_scores = []
     for idx,i in enumerate(answers[0]['answer']):
@@ -105,7 +92,6 @@ def eval(question, answers):
-    #   llm_scores.sort(reverse = True)
     x = np.array(llm_scores)
     x = x.reshape(-1, 1)
     x_norm = (pre.MinMaxScaler().fit_transform(x)).flatten().tolist()
@@ -116,24 +102,13 @@ def eval(question, answers):
     st.session_state.answers = answers
-    # print(x_norm)
-    # print(y_norm)
     dcg = dcg_score(np.asarray([llm_scores]),np.asarray([current_scores]))
-    # print("DCG score : ", dcg)
     # IDCG score
     idcg = dcg_score(np.asarray([llm_scores]),np.asarray([llm_scores]))
-    # print("IDCG score : ", idcg)
     # Normalized DCG score
     ndcg = dcg
-    # print(st.session_state.input_ndcg)
-    # if(st.session_state.input_previous_query!=""):
-    #     if(st.session_state.input_previous_query == st.session_state.input_text):
-    #         st.session_state.input_ndcg=0.0
     if(ndcg > st.session_state.input_ndcg and st.session_state.input_ndcg!=0.0):
         st.session_state.ndcg_increase = "&uarr;~"+str('%.3f'%(ndcg-st.session_state.input_ndcg ))
     elif(ndcg < st.session_state.input_ndcg):
@@ -143,7 +118,4 @@ def eval(question, answers):
-    st.session_state.input_ndcg = ndcg#round(ndcg_score(np.asarray([x_norm]), np.asarray([y_norm]), k=st.session_state.input_K),2)
-    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>")
-    print(st.session_state.input_ndcg)

 import requests
 import numpy as np
 import pandas as pd
 from typing import List
 from botocore.auth import SigV4Auth
+#from langchain.llms.bedrock import Bedrock
 from botocore.awsrequest import AWSRequest
 import streamlit as st
 import re
 from sklearn.metrics import ndcg_score,dcg_score
 from sklearn import preprocessing as pre
+import invoke_models#invoke_llm_model
+# bedrock_ = boto3.client(
+#     'bedrock-runtime',
+#     aws_access_key_id=st.secrets['user_access_key'],
+#     aws_secret_access_key=st.secrets['user_secret_key'], region_name = 'us-east-1'
+# )
+# inference_modifier = {
+#     "max_tokens_to_sample": 4096,
+#     "temperature": 0,
+#     "top_k": 250,
+#     "top_p": 1,
+#     "stop_sequences": ["\n\nHuman"],
+# }
+# textgen_llm = Bedrock(
+#     model_id="anthropic.claude-v2:1",
+#     client=bedrock_,
+#     model_kwargs=inference_modifier,
+# )
 #@st.cache_data
 def eval(question, answers):
     search_results: str = ""
     prompt: str = """Human: You are a grader assessing relevance of a retrieved document to a user question. \n
     The User question and Retrieved documents are provided below. The Retrieved documents are retail product descriptions that the human is looking for. \n
         search_results += f"Index: {index_}, Description: {desc}\n\n"
         index_ = index_+1
     prompt = prompt.format(query, search_results)
+    response = invoke_llm_model.invoke_llm_model(prompt,False)
+    #response = textgen_llm(prompt)
+    print("Response from LLM: ", response)
     inter_trim =response.split("[")[1]
     final_out = json.loads('{"results":['+inter_trim.split("]")[0]+']}')
     llm_scores = []
     current_scores = []
     for idx,i in enumerate(answers[0]['answer']):
     x = np.array(llm_scores)
     x = x.reshape(-1, 1)
     x_norm = (pre.MinMaxScaler().fit_transform(x)).flatten().tolist()
     st.session_state.answers = answers
     dcg = dcg_score(np.asarray([llm_scores]),np.asarray([current_scores]))
     # IDCG score
     idcg = dcg_score(np.asarray([llm_scores]),np.asarray([llm_scores]))
     # Normalized DCG score
     ndcg = dcg
     if(ndcg > st.session_state.input_ndcg and st.session_state.input_ndcg!=0.0):
         st.session_state.ndcg_increase = "&uarr;~"+str('%.3f'%(ndcg-st.session_state.input_ndcg ))
     elif(ndcg < st.session_state.input_ndcg):
+    st.session_state.input_ndcg = ndcg