Spaces:

cs70
/

test-excel

Sleeping

App Files Files Community

Chiragkumar Savani commited on Aug 31, 2024

Commit

42c790d

1 Parent(s): 7155478

Fixes for name changes in columns for various excel files

Browse files

Files changed (1) hide show

app.py +22 -30

app.py CHANGED Viewed

@@ -67,41 +67,34 @@ def get_output_value(value1, value2, is_high=False):
 def process_csv(file):
     df = pd.read_csv(file)
     df.columns = df.columns.str.strip()  # Remove trailing spaces from column names
     # Add three empty columns between LOW PRICE and CLOSE PRICE
-    low_price_index = df.columns.get_loc('LOW PRICE')
     df.insert(low_price_index + 1, 'HIGH Result', '')
     df.insert(low_price_index + 2, 'LOW Result', '')
     df.insert(low_price_index + 3, 'Empty Column', '')
     # Convert DATE to datetime
-    df['DATE'] = pd.to_datetime(df['DATE'], format='%d-%b-%Y')
     # Detect the last Thursday of each month and insert an empty row after it
-    df['Last_Thursday'] = df['DATE'].apply(last_thursday)
-    # is_last_thursday = df['DATE'] == df['Last_Thursday']
-    # rows = []
-    # for i, row in df.iterrows():
-    #     rows.append(row)
-    #     if is_last_thursday[i]:
-    #         # Append an empty row (NaN values) after the last Thursday
-    #         empty_row = pd.Series([None] * len(df.columns), index=df.columns)
-    #         rows.append(empty_row)
-    # df = pd.DataFrame(rows)
-    # print(df)
-    # df = df.drop(columns=['Last_Thursday'])
     indices_to_insert = []
     for i in range(len(df)):
-        if df.loc[i, 'DATE'] == df.loc[i, 'Last_Thursday']:
             indices_to_insert.append(i)
-    # Insert empty rows
-    # for idx in reversed(indices_to_insert):
-    #     df = pd.concat([df.iloc[:idx], pd.DataFrame([{}]), df.iloc[idx:]]).reset_index(drop=True)
     df['Separator'] = ''
     # Insert empty rows and update the Last_Thursday column
@@ -109,13 +102,12 @@ def process_csv(file):
         # Insert an empty row
         df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True)
-    # df['HIGH PRICE'] = df['HIGH PRICE'].str.replace(',', '')
-    price_columns = ['HIGH PRICE', 'LOW PRICE']
     df[price_columns] = df[price_columns].replace({',': ''}, regex=True).apply(pd.to_numeric, errors='coerce')
     # Calculate global thresholds for HIGH PRICE and LOW PRICE columns
-    high_price_threshold = calculate_threshold(df['HIGH PRICE'].max(), is_high_price=True)
-    low_price_threshold = calculate_threshold(df['LOW PRICE'].min(), is_high_price=False)
     # Process HIGH PRICE and LOW PRICE columns
     def process_column(df, style_df, column_name, result_column_name, threshold):
@@ -132,7 +124,7 @@ def process_csv(file):
                     for j in range(i - 1, -1, -1):
                         diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name])
                         if diff < threshold and not element_used[rows[j]]:
-                            output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'HIGH' in column_name)
                             # print(f"i {rows[i]} j {rows[j]} {column_name}")
                             # print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}")
                             df.at[rows[j], result_column_name] = output_value
@@ -144,7 +136,7 @@ def process_csv(file):
                             style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color)
                             # check if there is higher or lower value, if yes, then colorize it
-                            response = check_condition_passed(df, column_name, rows[j], output_value, 'HIGH' in column_name)
                             if response:
                                 style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color)
                             break
@@ -156,11 +148,11 @@ def process_csv(file):
     style_df = pd.DataFrame('', index=df.index, columns=df.columns)
     output_file = file.replace(".csv", "_processed.xlsx")
-    process_column(df, style_df, 'HIGH PRICE', 'HIGH Result', high_price_threshold)
-    process_column(df, style_df, 'LOW PRICE', 'LOW Result', low_price_threshold)
     # add an empty row before the new month
-    df['DATE'] = df['DATE'].dt.strftime('%d-%b-%Y')
     # df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y')
     styled_df = df.style.apply(lambda _: style_df, axis=None)

 def process_csv(file):
     df = pd.read_csv(file)
     df.columns = df.columns.str.strip()  # Remove trailing spaces from column names
+    HIGH_NAME = "HIGH PRICE"
+    if HIGH_NAME not in df.columns:
+        HIGH_NAME = "HIGH"
+    LOW_NAME = "LOW PRICE"
+    if LOW_NAME not in df.columns:
+        LOW_NAME = "LOW"
+    DATE_NAME = "DATE"
+    if DATE_NAME not in df.columns:
+        DATE_NAME = "Date"
     # Add three empty columns between LOW PRICE and CLOSE PRICE
+    low_price_index = df.columns.get_loc(LOW_NAME)
     df.insert(low_price_index + 1, 'HIGH Result', '')
     df.insert(low_price_index + 2, 'LOW Result', '')
     df.insert(low_price_index + 3, 'Empty Column', '')
     # Convert DATE to datetime
+    df[DATE_NAME] = pd.to_datetime(df[DATE_NAME], format='%d-%b-%Y')
     # Detect the last Thursday of each month and insert an empty row after it
+    df['Last_Thursday'] = df[DATE_NAME].apply(last_thursday)
     indices_to_insert = []
     for i in range(len(df)):
+        if df.loc[i, DATE_NAME] == df.loc[i, 'Last_Thursday']:
             indices_to_insert.append(i)
     df['Separator'] = ''
     # Insert empty rows and update the Last_Thursday column
         # Insert an empty row
         df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True)
+    price_columns = [HIGH_NAME, LOW_NAME]
     df[price_columns] = df[price_columns].replace({',': ''}, regex=True).apply(pd.to_numeric, errors='coerce')
     # Calculate global thresholds for HIGH PRICE and LOW PRICE columns
+    high_price_threshold = calculate_threshold(df[HIGH_NAME].max(), is_high_price=True)
+    low_price_threshold = calculate_threshold(df[LOW_NAME].min(), is_high_price=False)
     # Process HIGH PRICE and LOW PRICE columns
     def process_column(df, style_df, column_name, result_column_name, threshold):
                     for j in range(i - 1, -1, -1):
                         diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name])
                         if diff < threshold and not element_used[rows[j]]:
+                            output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'high' in column_name.lower())
                             # print(f"i {rows[i]} j {rows[j]} {column_name}")
                             # print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}")
                             df.at[rows[j], result_column_name] = output_value
                             style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color)
                             # check if there is higher or lower value, if yes, then colorize it
+                            response = check_condition_passed(df, column_name, rows[j], output_value, 'high' in column_name.lower())
                             if response:
                                 style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color)
                             break
     style_df = pd.DataFrame('', index=df.index, columns=df.columns)
     output_file = file.replace(".csv", "_processed.xlsx")
+    process_column(df, style_df, HIGH_NAME, 'HIGH Result', high_price_threshold)
+    process_column(df, style_df, LOW_NAME, 'LOW Result', low_price_threshold)
     # add an empty row before the new month
+    df[DATE_NAME] = df[DATE_NAME].dt.strftime('%d-%b-%Y')
     # df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y')
     styled_df = df.style.apply(lambda _: style_df, axis=None)