Spaces:
Sleeping
Sleeping
import time | |
from functools import wraps | |
def reconstruct_dataframe(alpha_low, alpha_med, alpha_high, original_df): | |
# Define the mapping from original values to new alpha parameters | |
value_to_alpha = { | |
0.00191: alpha_low, | |
0.00767: alpha_high, | |
0.0038: alpha_med | |
} | |
# Check if each value is present at least once in the DataFrame | |
for original_value in value_to_alpha.keys(): | |
if not (original_df == original_value).any().any(): | |
raise ValueError(f"Value {original_value} not found in the input DataFrame.") | |
# Create a new DataFrame based on the original one | |
new_df = original_df.copy() | |
# Apply the mapping to each element in the DataFrame | |
for original_value, new_value in value_to_alpha.items(): | |
new_df = new_df.replace(original_value, new_value) | |
return new_df | |
def preprocess_dataFrame(df, headerRow_idx=0, numRowsStart_idx = None, numRowsEnd_idx=None, numColsStart_idx=None, numColsEnd_idx=None, rowNames_idx=None): | |
df.columns = df.iloc[headerRow_idx] #Set the header | |
if rowNames_idx is not None: | |
df.index = df.iloc[:, rowNames_idx] #Set the row names | |
df = df.iloc[numRowsStart_idx : numRowsEnd_idx, numColsStart_idx:numColsEnd_idx] #Slice the dataset to numerical data | |
return df | |
def timeit(f): | |
def timed(*args, **kw): | |
ts = time.time() | |
result = f(*args, **kw) | |
te = time.time() | |
print ('func:%r args:[%r, %r] took: %2.4f sec' % \ | |
(f.__name__, te-ts)) | |
#(f.__name__, args, kw, te-ts)) | |
return result | |
return timed | |
def timing_decorator(func): | |
def wrapper(*args, **kwargs): | |
start_time = time.time() | |
result = func(*args, **kwargs) | |
end_time = time.time() | |
duration = end_time - start_time | |
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)) | |
print(f"{func.__name__} took {duration:.4f} seconds. Finished at {timestamp}") | |
return result | |
return wrapper | |
# Function to compare two dataframes after converting and rounding | |
def compare_dataframes(df1, df2, decimals=8): | |
# Function to convert DataFrame columns to float and then round | |
def convert_and_round_dataframe(df, decimals): | |
# Convert all columns to float | |
df_float = df.astype(float) | |
# Round to the specified number of decimals | |
return df_float.round(decimals) | |
rounded_df1 = convert_and_round_dataframe(df1, decimals) | |
rounded_df2 = convert_and_round_dataframe(df2, decimals) | |
are_equal = rounded_df1.equals(rounded_df2) | |
print("Both methods are equal:", are_equal) | |
print("Numba shape:", df2.shape) | |
print("Original shape:", df1.shape) | |
print("======== ORIGINAL OUTPUT (first item in output list, head() for the first 5 columns)") | |
print(df1.iloc[0:5].head(2)) | |
print("======== New method OUTPUT (first item in output list, head() for the first 5 columns)") | |
print(df2.iloc[0:5].head(2)) | |
def align_dataframes(df1, df2, key): | |
""" | |
Align two dataframes based on a common key, ensuring that both dataframes | |
have only the rows with matching keys. | |
Parameters: | |
- df1: First dataframe. | |
- df2: Second dataframe. | |
- key: Column name to align dataframes on. | |
Returns: | |
- df1_aligned, df2_aligned: Tuple of aligned dataframes. | |
""" | |
common_ids = df1.index.intersection(df2[key]) | |
df1_aligned = df1.loc[common_ids] | |
df2_aligned = df2[df2[key].isin(common_ids)].set_index(key, drop=False) | |
return df1_aligned, df2_aligned | |