Spaces:

eagle0504
/

YSA-Larkin-Comm

Sleeping

App Files Files Community

eagle0504 commited on Feb 24

Commit

f4b4efa

•

1 Parent(s): b6e2681

new feature added

Browse files

Files changed (2) hide show

app.py +4 -1
utils/helper_functions.py +76 -0

app.py CHANGED Viewed

@@ -157,7 +157,10 @@ if prompt := st.chat_input(initial_input):
             for i in range(final_ref.shape[0]):
                 this_content = final_ref["answers"][i]
                 if len(this_content) > 3:
-                    this_score = calculate_sts_openai_score(question, this_content)
                 else:
                     this_score = 0
                 independent_ai_judge_score.append(this_score)

             for i in range(final_ref.shape[0]):
                 this_content = final_ref["answers"][i]
                 if len(this_content) > 3:
+                    arr1 = openai_text_embedding(question)
+                    arr2 = openai_text_embedding(this_content)
+                    # this_score = calculate_sts_openai_score(question, this_content)
+                    this_score = quantized_influence(arr1, arr2)
                 else:
                     this_score = 0
                 independent_ai_judge_score.append(this_score)

utils/helper_functions.py CHANGED Viewed

@@ -18,6 +18,21 @@ from scipy.spatial.distance import cosine
 openai.api_key = os.environ["OPENAI_API_KEY"]
 def call_chatgpt(prompt: str) -> str:
     """
     Uses the OpenAI API to generate an AI response to a prompt.
@@ -69,6 +84,46 @@ def calculate_sts_openai_score(sentence1: str, sentence2: str) -> float:
     return similarity_score
 def query(payload: Dict[str, Any]) -> Dict[str, Any]:
     """
     Sends a JSON payload to a predefined API URL and returns the JSON response.
@@ -123,3 +178,24 @@ def llama2_7b_ysa(prompt: str) -> str:
     response: str = output[0]["generated_text"]
     return response

 openai.api_key = os.environ["OPENAI_API_KEY"]
+def merge_dataframes(dataframes: List[pd.DataFrame]) -> pd.DataFrame:
+    """Merges a list of DataFrames, keeping only specific columns."""
+    # Concatenate the list of dataframes
+    combined_dataframe = pd.concat(
+        dataframes, ignore_index=True
+    )  # Combine all dataframes into one
+    # Ensure that the resulting dataframe only contains the columns "context", "questions", "answers"
+    combined_dataframe = combined_dataframe[
+        ["context", "questions", "answers"]
+    ]  # Filter for specific columns
+    return combined_dataframe  # Return the merged and filtered DataFrame
 def call_chatgpt(prompt: str) -> str:
     """
     Uses the OpenAI API to generate an AI response to a prompt.
     return similarity_score
+def add_dist_score_column(
+    dataframe: pd.DataFrame,
+    sentence: str,
+) -> pd.DataFrame:
+    dataframe["stsopenai"] = dataframe["questions"].apply(
+        lambda x: calculate_sts_openai_score(str(x), sentence)
+    )
+    sorted_dataframe = dataframe.sort_values(by="stsopenai", ascending=False)
+    return sorted_dataframe.iloc[:5, :]
+def convert_to_list_of_dict(df: pd.DataFrame) -> List[Dict[str, str]]:
+    """
+    Reads in a pandas DataFrame and produces a list of dictionaries with two keys each, 'question' and 'answer.'
+    Args:
+        df: A pandas DataFrame with columns named 'questions' and 'answers'.
+    Returns:
+        A list of dictionaries, with each dictionary containing a 'question' and 'answer' key-value pair.
+    """
+    # Initialize an empty list to store the dictionaries
+    result = []
+    # Loop through each row of the DataFrame
+    for index, row in df.iterrows():
+        # Create a dictionary with the current question and answer
+        qa_dict_quest = {"role": "user", "content": row["questions"]}
+        qa_dict_ans = {"role": "assistant", "content": row["answers"]}
+        # Add the dictionary to the result list
+        result.append(qa_dict_quest)
+        result.append(qa_dict_ans)
+    # Return the list of dictionaries
+    return result
 def query(payload: Dict[str, Any]) -> Dict[str, Any]:
     """
     Sends a JSON payload to a predefined API URL and returns the JSON response.
     response: str = output[0]["generated_text"]
     return response
+def quantize_to_4bit(arr: Union[np.ndarray, Any]) -> np.ndarray:
+    """Converts an array to a 4-bit representation by normalizing and scaling its values."""
+    if not isinstance(arr, np.ndarray):  # Ensure input is a numpy array
+        arr = np.array(arr)
+    arr_min = arr.min()  # Find minimum value
+    arr_max = arr.max()  # Find maximum value
+    normalized_arr = (arr - arr_min) / (arr_max - arr_min)  # Normalize values to [0, 1]
+    return np.round(normalized_arr * 15).astype(int)  # Scale to 0-15 and round
+def quantized_influence(arr1: np.ndarray, arr2: np.ndarray) -> float:
+    """Calculates a weighted measure of influence based on quantized version of input arrays."""
+    arr1_4bit = quantize_to_4bit(arr1)  # Quantize arr1 to 4-bit
+    arr2_4bit = quantize_to_4bit(arr2)  # Quantize arr2 to 4-bit
+    unique_values = np.unique(arr1_4bit)  # Find unique values in arr1_4bit
+    y_bar_global = np.mean(arr2_4bit)  # Compute global average of arr2_4bit
+    # Compute weighted local averages and normalize
+    weighted_local_averages = [(np.mean(arr2_4bit[arr1_4bit == val])-y_bar_global)**2 * len(arr2_4bit[arr1_4bit == val])**2 for val in unique_values]
+    return np.mean(weighted_local_averages) / np.std(arr2_4bit)  # Return normalized weighted average