Spaces:
Sleeping
Sleeping
Chiragkumar Savani
commited on
Commit
•
42c790d
1
Parent(s):
7155478
Fixes for name changes in columns for various excel files
Browse files
app.py
CHANGED
@@ -67,41 +67,34 @@ def get_output_value(value1, value2, is_high=False):
|
|
67 |
def process_csv(file):
|
68 |
df = pd.read_csv(file)
|
69 |
df.columns = df.columns.str.strip() # Remove trailing spaces from column names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
# Add three empty columns between LOW PRICE and CLOSE PRICE
|
72 |
-
low_price_index = df.columns.get_loc(
|
73 |
df.insert(low_price_index + 1, 'HIGH Result', '')
|
74 |
df.insert(low_price_index + 2, 'LOW Result', '')
|
75 |
df.insert(low_price_index + 3, 'Empty Column', '')
|
76 |
|
77 |
# Convert DATE to datetime
|
78 |
-
df[
|
79 |
|
80 |
# Detect the last Thursday of each month and insert an empty row after it
|
81 |
-
df['Last_Thursday'] = df[
|
82 |
-
# is_last_thursday = df['DATE'] == df['Last_Thursday']
|
83 |
-
|
84 |
-
# rows = []
|
85 |
-
# for i, row in df.iterrows():
|
86 |
-
# rows.append(row)
|
87 |
-
# if is_last_thursday[i]:
|
88 |
-
# # Append an empty row (NaN values) after the last Thursday
|
89 |
-
# empty_row = pd.Series([None] * len(df.columns), index=df.columns)
|
90 |
-
# rows.append(empty_row)
|
91 |
-
|
92 |
-
# df = pd.DataFrame(rows)
|
93 |
-
# print(df)
|
94 |
-
# df = df.drop(columns=['Last_Thursday'])
|
95 |
|
96 |
indices_to_insert = []
|
97 |
|
98 |
for i in range(len(df)):
|
99 |
-
if df.loc[i,
|
100 |
indices_to_insert.append(i)
|
101 |
-
|
102 |
-
# Insert empty rows
|
103 |
-
# for idx in reversed(indices_to_insert):
|
104 |
-
# df = pd.concat([df.iloc[:idx], pd.DataFrame([{}]), df.iloc[idx:]]).reset_index(drop=True)
|
105 |
df['Separator'] = ''
|
106 |
|
107 |
# Insert empty rows and update the Last_Thursday column
|
@@ -109,13 +102,12 @@ def process_csv(file):
|
|
109 |
# Insert an empty row
|
110 |
df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True)
|
111 |
|
112 |
-
|
113 |
-
price_columns = ['HIGH PRICE', 'LOW PRICE']
|
114 |
df[price_columns] = df[price_columns].replace({',': ''}, regex=True).apply(pd.to_numeric, errors='coerce')
|
115 |
|
116 |
# Calculate global thresholds for HIGH PRICE and LOW PRICE columns
|
117 |
-
high_price_threshold = calculate_threshold(df[
|
118 |
-
low_price_threshold = calculate_threshold(df[
|
119 |
|
120 |
# Process HIGH PRICE and LOW PRICE columns
|
121 |
def process_column(df, style_df, column_name, result_column_name, threshold):
|
@@ -132,7 +124,7 @@ def process_csv(file):
|
|
132 |
for j in range(i - 1, -1, -1):
|
133 |
diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name])
|
134 |
if diff < threshold and not element_used[rows[j]]:
|
135 |
-
output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], '
|
136 |
# print(f"i {rows[i]} j {rows[j]} {column_name}")
|
137 |
# print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}")
|
138 |
df.at[rows[j], result_column_name] = output_value
|
@@ -144,7 +136,7 @@ def process_csv(file):
|
|
144 |
style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color)
|
145 |
|
146 |
# check if there is higher or lower value, if yes, then colorize it
|
147 |
-
response = check_condition_passed(df, column_name, rows[j], output_value, '
|
148 |
if response:
|
149 |
style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color)
|
150 |
break
|
@@ -156,11 +148,11 @@ def process_csv(file):
|
|
156 |
style_df = pd.DataFrame('', index=df.index, columns=df.columns)
|
157 |
output_file = file.replace(".csv", "_processed.xlsx")
|
158 |
|
159 |
-
process_column(df, style_df,
|
160 |
-
process_column(df, style_df,
|
161 |
|
162 |
# add an empty row before the new month
|
163 |
-
df[
|
164 |
# df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y')
|
165 |
|
166 |
styled_df = df.style.apply(lambda _: style_df, axis=None)
|
|
|
67 |
def process_csv(file):
|
68 |
df = pd.read_csv(file)
|
69 |
df.columns = df.columns.str.strip() # Remove trailing spaces from column names
|
70 |
+
HIGH_NAME = "HIGH PRICE"
|
71 |
+
if HIGH_NAME not in df.columns:
|
72 |
+
HIGH_NAME = "HIGH"
|
73 |
+
LOW_NAME = "LOW PRICE"
|
74 |
+
if LOW_NAME not in df.columns:
|
75 |
+
LOW_NAME = "LOW"
|
76 |
+
|
77 |
+
DATE_NAME = "DATE"
|
78 |
+
if DATE_NAME not in df.columns:
|
79 |
+
DATE_NAME = "Date"
|
80 |
|
81 |
# Add three empty columns between LOW PRICE and CLOSE PRICE
|
82 |
+
low_price_index = df.columns.get_loc(LOW_NAME)
|
83 |
df.insert(low_price_index + 1, 'HIGH Result', '')
|
84 |
df.insert(low_price_index + 2, 'LOW Result', '')
|
85 |
df.insert(low_price_index + 3, 'Empty Column', '')
|
86 |
|
87 |
# Convert DATE to datetime
|
88 |
+
df[DATE_NAME] = pd.to_datetime(df[DATE_NAME], format='%d-%b-%Y')
|
89 |
|
90 |
# Detect the last Thursday of each month and insert an empty row after it
|
91 |
+
df['Last_Thursday'] = df[DATE_NAME].apply(last_thursday)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
indices_to_insert = []
|
94 |
|
95 |
for i in range(len(df)):
|
96 |
+
if df.loc[i, DATE_NAME] == df.loc[i, 'Last_Thursday']:
|
97 |
indices_to_insert.append(i)
|
|
|
|
|
|
|
|
|
98 |
df['Separator'] = ''
|
99 |
|
100 |
# Insert empty rows and update the Last_Thursday column
|
|
|
102 |
# Insert an empty row
|
103 |
df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True)
|
104 |
|
105 |
+
price_columns = [HIGH_NAME, LOW_NAME]
|
|
|
106 |
df[price_columns] = df[price_columns].replace({',': ''}, regex=True).apply(pd.to_numeric, errors='coerce')
|
107 |
|
108 |
# Calculate global thresholds for HIGH PRICE and LOW PRICE columns
|
109 |
+
high_price_threshold = calculate_threshold(df[HIGH_NAME].max(), is_high_price=True)
|
110 |
+
low_price_threshold = calculate_threshold(df[LOW_NAME].min(), is_high_price=False)
|
111 |
|
112 |
# Process HIGH PRICE and LOW PRICE columns
|
113 |
def process_column(df, style_df, column_name, result_column_name, threshold):
|
|
|
124 |
for j in range(i - 1, -1, -1):
|
125 |
diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name])
|
126 |
if diff < threshold and not element_used[rows[j]]:
|
127 |
+
output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'high' in column_name.lower())
|
128 |
# print(f"i {rows[i]} j {rows[j]} {column_name}")
|
129 |
# print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}")
|
130 |
df.at[rows[j], result_column_name] = output_value
|
|
|
136 |
style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color)
|
137 |
|
138 |
# check if there is higher or lower value, if yes, then colorize it
|
139 |
+
response = check_condition_passed(df, column_name, rows[j], output_value, 'high' in column_name.lower())
|
140 |
if response:
|
141 |
style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color)
|
142 |
break
|
|
|
148 |
style_df = pd.DataFrame('', index=df.index, columns=df.columns)
|
149 |
output_file = file.replace(".csv", "_processed.xlsx")
|
150 |
|
151 |
+
process_column(df, style_df, HIGH_NAME, 'HIGH Result', high_price_threshold)
|
152 |
+
process_column(df, style_df, LOW_NAME, 'LOW Result', low_price_threshold)
|
153 |
|
154 |
# add an empty row before the new month
|
155 |
+
df[DATE_NAME] = df[DATE_NAME].dt.strftime('%d-%b-%Y')
|
156 |
# df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y')
|
157 |
|
158 |
styled_df = df.style.apply(lambda _: style_df, axis=None)
|