eagle0504 commited on
Commit
f4b4efa
1 Parent(s): b6e2681

new feature added

Browse files
Files changed (2) hide show
  1. app.py +4 -1
  2. utils/helper_functions.py +76 -0
app.py CHANGED
@@ -157,7 +157,10 @@ if prompt := st.chat_input(initial_input):
157
  for i in range(final_ref.shape[0]):
158
  this_content = final_ref["answers"][i]
159
  if len(this_content) > 3:
160
- this_score = calculate_sts_openai_score(question, this_content)
 
 
 
161
  else:
162
  this_score = 0
163
  independent_ai_judge_score.append(this_score)
 
157
  for i in range(final_ref.shape[0]):
158
  this_content = final_ref["answers"][i]
159
  if len(this_content) > 3:
160
+ arr1 = openai_text_embedding(question)
161
+ arr2 = openai_text_embedding(this_content)
162
+ # this_score = calculate_sts_openai_score(question, this_content)
163
+ this_score = quantized_influence(arr1, arr2)
164
  else:
165
  this_score = 0
166
  independent_ai_judge_score.append(this_score)
utils/helper_functions.py CHANGED
@@ -18,6 +18,21 @@ from scipy.spatial.distance import cosine
18
  openai.api_key = os.environ["OPENAI_API_KEY"]
19
 
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def call_chatgpt(prompt: str) -> str:
22
  """
23
  Uses the OpenAI API to generate an AI response to a prompt.
@@ -69,6 +84,46 @@ def calculate_sts_openai_score(sentence1: str, sentence2: str) -> float:
69
  return similarity_score
70
 
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def query(payload: Dict[str, Any]) -> Dict[str, Any]:
73
  """
74
  Sends a JSON payload to a predefined API URL and returns the JSON response.
@@ -123,3 +178,24 @@ def llama2_7b_ysa(prompt: str) -> str:
123
  response: str = output[0]["generated_text"]
124
 
125
  return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  openai.api_key = os.environ["OPENAI_API_KEY"]
19
 
20
 
21
+ def merge_dataframes(dataframes: List[pd.DataFrame]) -> pd.DataFrame:
22
+ """Merges a list of DataFrames, keeping only specific columns."""
23
+ # Concatenate the list of dataframes
24
+ combined_dataframe = pd.concat(
25
+ dataframes, ignore_index=True
26
+ ) # Combine all dataframes into one
27
+
28
+ # Ensure that the resulting dataframe only contains the columns "context", "questions", "answers"
29
+ combined_dataframe = combined_dataframe[
30
+ ["context", "questions", "answers"]
31
+ ] # Filter for specific columns
32
+
33
+ return combined_dataframe # Return the merged and filtered DataFrame
34
+
35
+
36
  def call_chatgpt(prompt: str) -> str:
37
  """
38
  Uses the OpenAI API to generate an AI response to a prompt.
 
84
  return similarity_score
85
 
86
 
87
+ def add_dist_score_column(
88
+ dataframe: pd.DataFrame,
89
+ sentence: str,
90
+ ) -> pd.DataFrame:
91
+ dataframe["stsopenai"] = dataframe["questions"].apply(
92
+ lambda x: calculate_sts_openai_score(str(x), sentence)
93
+ )
94
+
95
+ sorted_dataframe = dataframe.sort_values(by="stsopenai", ascending=False)
96
+ return sorted_dataframe.iloc[:5, :]
97
+
98
+
99
+ def convert_to_list_of_dict(df: pd.DataFrame) -> List[Dict[str, str]]:
100
+ """
101
+ Reads in a pandas DataFrame and produces a list of dictionaries with two keys each, 'question' and 'answer.'
102
+
103
+ Args:
104
+ df: A pandas DataFrame with columns named 'questions' and 'answers'.
105
+
106
+ Returns:
107
+ A list of dictionaries, with each dictionary containing a 'question' and 'answer' key-value pair.
108
+ """
109
+
110
+ # Initialize an empty list to store the dictionaries
111
+ result = []
112
+
113
+ # Loop through each row of the DataFrame
114
+ for index, row in df.iterrows():
115
+ # Create a dictionary with the current question and answer
116
+ qa_dict_quest = {"role": "user", "content": row["questions"]}
117
+ qa_dict_ans = {"role": "assistant", "content": row["answers"]}
118
+
119
+ # Add the dictionary to the result list
120
+ result.append(qa_dict_quest)
121
+ result.append(qa_dict_ans)
122
+
123
+ # Return the list of dictionaries
124
+ return result
125
+
126
+
127
  def query(payload: Dict[str, Any]) -> Dict[str, Any]:
128
  """
129
  Sends a JSON payload to a predefined API URL and returns the JSON response.
 
178
  response: str = output[0]["generated_text"]
179
 
180
  return response
181
+
182
+
183
+ def quantize_to_4bit(arr: Union[np.ndarray, Any]) -> np.ndarray:
184
+ """Converts an array to a 4-bit representation by normalizing and scaling its values."""
185
+ if not isinstance(arr, np.ndarray): # Ensure input is a numpy array
186
+ arr = np.array(arr)
187
+ arr_min = arr.min() # Find minimum value
188
+ arr_max = arr.max() # Find maximum value
189
+ normalized_arr = (arr - arr_min) / (arr_max - arr_min) # Normalize values to [0, 1]
190
+ return np.round(normalized_arr * 15).astype(int) # Scale to 0-15 and round
191
+
192
+
193
+ def quantized_influence(arr1: np.ndarray, arr2: np.ndarray) -> float:
194
+ """Calculates a weighted measure of influence based on quantized version of input arrays."""
195
+ arr1_4bit = quantize_to_4bit(arr1) # Quantize arr1 to 4-bit
196
+ arr2_4bit = quantize_to_4bit(arr2) # Quantize arr2 to 4-bit
197
+ unique_values = np.unique(arr1_4bit) # Find unique values in arr1_4bit
198
+ y_bar_global = np.mean(arr2_4bit) # Compute global average of arr2_4bit
199
+ # Compute weighted local averages and normalize
200
+ weighted_local_averages = [(np.mean(arr2_4bit[arr1_4bit == val])-y_bar_global)**2 * len(arr2_4bit[arr1_4bit == val])**2 for val in unique_values]
201
+ return np.mean(weighted_local_averages) / np.std(arr2_4bit) # Return normalized weighted average