eagle0504 commited on
Commit
651347a
1 Parent(s): 65ce9a0

Update utils/helper_functions.py

Browse files
Files changed (1) hide show
  1. utils/helper_functions.py +52 -64
utils/helper_functions.py CHANGED
@@ -238,76 +238,64 @@ def llama2_7b_ysa(prompt: str) -> str:
238
  return response
239
 
240
 
241
- def quantize_to_4bit(arr: Union[np.ndarray, Any]) -> np.ndarray:
242
- """
243
- Converts an array to a 4-bit representation by normalizing and scaling its values.
244
-
245
- The function first checks if the input is an instance of numpy ndarray,
246
- if not, it converts the input into a numpy ndarray. Then, it normalizes
247
- the values of the array to be between 0 and 1. Finally, it scales these
248
- normalized values to the range of 0-15, corresponding to 4-bit integers,
249
- and returns this array of integers.
250
-
251
- Parameters:
252
- arr (Union[np.ndarray, Any]): An array or any type that can be converted to a numpy ndarray.
253
 
254
  Returns:
255
- np.ndarray: A numpy ndarray containing the input data quantized to 4-bit representation.
256
-
257
- Examples:
258
- >>> quantize_to_4bit([0, 128, 255])
259
- array([ 0, 7, 15])
260
  """
261
- if not isinstance(arr, np.ndarray): # Check if the input is a numpy array
262
- arr = np.array(arr) # Convert to numpy array if not already
263
-
264
- arr_min = arr.min() # Find minimum value in the array
265
- arr_max = arr.max() # Find maximum value in the array
266
-
267
- # Normalize array values to a [0, 1] range
268
- normalized_arr = (arr - arr_min) / (arr_max - arr_min)
269
 
270
- # Scale normalized values to a 0-15 range (4-bit) and convert to integer
271
- return np.round(normalized_arr * 15).astype(int)
272
 
273
-
274
- def quantized_influence(arr1: np.ndarray, arr2: np.ndarray) -> float:
275
  """
276
- Calculates a weighted measure of influence between two arrays based on their quantized (4-bit) versions.
277
-
278
- This function first quantizes both input arrays to 4-bit representations and then calculates a weighting based
279
- on the unique values of the first array's quantized version. It uses these weights to compute local averages
280
- within the second array's quantized version, assessing the influence of the first array on the second.
281
- The influence is normalized by the standard deviation of the second array's quantized version.
282
-
283
- Parameters:
284
- arr1 (np.ndarray): The first input numpy array.
285
- arr2 (np.ndarray): The second input numpy array.
286
 
287
  Returns:
288
- float: The calculated influence value, representing a weighted average that has been normalized.
289
-
290
- Note:
291
- Both inputs must be numpy ndarrays and it's expected that a function named `quantize_to_4bit`
292
- exists for converting an array to its 4-bit representation.
293
  """
294
- arr1_4bit = quantize_to_4bit(arr1) # Quantize the first array to 4-bit
295
- arr2_4bit = quantize_to_4bit(arr2) # Quantize the second array to 4-bit
296
-
297
- unique_values = np.unique(
298
- arr1_4bit
299
- ) # Get the unique 4-bit values from the first array
300
- y_bar_global = np.mean(
301
- arr2_4bit
302
- ) # Calculate the global mean of the second array's 4-bit version
303
-
304
- # Compute the sum of squares of the differences between local and global means,
305
- # each weighted by the square of the count of values in the local mean
306
- weighted_local_averages = [
307
- (np.mean((arr2_4bit[arr1_4bit == val]) - y_bar_global) ** 2)
308
- * len(arr2_4bit[arr1_4bit == val]) ** 2
309
- for val in unique_values
310
- ]
311
-
312
- # Return normalized weighted mean by dividing by the standard deviation of the second array's 4-bit version
313
- return np.mean(weighted_local_averages) / np.std(arr2_4bit)
 
 
 
 
 
 
 
 
 
 
 
238
  return response
239
 
240
 
241
+ def quantize_to_kbit(arr: Union[np.ndarray, Any], k: int = 16) -> np.ndarray:
242
+ """Converts an array to a k-bit representation by normalizing and scaling its values.
243
+
244
+ Args:
245
+ arr (Union[np.ndarray, Any]): The input array to be quantized.
246
+ k (int): The number of levels to quantize to. Defaults to 16 for 4-bit quantization.
 
 
 
 
 
 
247
 
248
  Returns:
249
+ np.ndarray: The quantized array with values scaled to 0 to k-1.
 
 
 
 
250
  """
251
+ if not isinstance(arr, np.ndarray): # Check if input is not a numpy array
252
+ arr = np.array(arr) # Convert input to a numpy array
253
+ arr_min = arr.min() # Calculate the minimum value in the array
254
+ arr_max = arr.max() # Calculate the maximum value in the array
255
+ normalized_arr = (arr - arr_min) / (arr_max - arr_min) # Normalize array values to [0, 1]
256
+ return np.round(normalized_arr * (k - 1)).astype(int) # Scale normalized values to 0-(k-1) and convert to integer
 
 
257
 
 
 
258
 
259
+ def quantized_influence(arr1: np.ndarray, arr2: np.ndarray, k: int = 16, use_dagger: bool = False) -> Tuple[float, List[float]]:
 
260
  """
261
+ Calculates a weighted measure of influence based on quantized version of input arrays and optionally applies a transformation.
262
+
263
+ Args:
264
+ arr1 (np.ndarray): First input array to be quantized and analyzed.
265
+ arr2 (np.ndarray): Second input array to be quantized and used for influence measurement.
266
+ k (int): The quantization level, defaults to 16 for 4-bit quantization.
267
+ use_dagger (bool): Flag to apply a transformation based on local averages, defaults to False.
 
 
 
268
 
269
  Returns:
270
+ Tuple[float, List[float]]: A tuple containing the quantized influence measure and an optional list of transformed values based on local estimates.
 
 
 
 
271
  """
272
+ # Quantize both arrays to k levels
273
+ arr1_quantized = quantize_to_kbit(arr1, k)
274
+ arr2_quantized = quantize_to_kbit(arr2, k)
275
+
276
+ # Find unique quantized values in arr1
277
+ unique_values = np.unique(arr1_quantized)
278
+
279
+ # Compute the global average of quantized arr2
280
+ y_bar_global = np.mean(arr2_quantized)
281
+
282
+ # Compute weighted local averages and normalize
283
+ weighted_local_averages = [(np.mean(arr2_quantized[arr1_quantized == val]) - y_bar_global)**2 * len(arr2_quantized[arr1_quantized == val])**2 for val in unique_values]
284
+ qim = np.mean(weighted_local_averages) / np.std(arr2_quantized) # Calculate the quantized influence measure
285
+
286
+ if use_dagger:
287
+ # If use_dagger is True, compute local estimates and map them to unique quantized values
288
+ local_estimates = [np.mean(arr2_quantized[arr1_quantized == val]) for val in unique_values]
289
+ daggers = {unique_values[i]: v for i, v in enumerate(local_estimates)} # Map unique values to local estimates
290
+
291
+ def find_val_(i: int) -> float:
292
+ """Helper function to map quantized values to their local estimates."""
293
+ return daggers[i]
294
+
295
+ # Apply transformation based on local estimates
296
+ daggered_values = list(map(find_val_, arr1_quantized))
297
+ else:
298
+ # If use_dagger is False, return the original quantized arr1 values
299
+ daggered_values = arr1_quantized.tolist()
300
+
301
+ return qim, daggered_values