Zack commited on
Commit
1dcf872
1 Parent(s): 775614e

fix: Update clean method to work with both csv input types

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -60,20 +60,27 @@ def plot_anomalies(df_test_value, data, anomalies):
60
  return fig
61
 
62
  def clean_data(df):
63
- # Convert "Date" and "Hour" columns into datetime format
64
- df["timestamp"] = pd.to_datetime(df["Date"] + ' ' + df["Hour"].astype(str) + ":00:00")
65
-
66
- # Keep only necessary columns
67
- df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
68
-
69
- # Rename column
70
- df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
71
-
 
 
 
 
 
 
72
  return df
73
 
74
  def master(file):
75
  # read file
76
  data = pd.read_csv(file.name)
 
77
 
78
  # clean data
79
  data = clean_data(data)
 
60
  return fig
61
 
62
  def clean_data(df):
63
+ # Check if 'Date' and 'Hour' columns exist in the dataframe
64
+ if "Date" in df.columns and "Hour" in df.columns:
65
+ # Convert "Date" and "Hour" columns into datetime format
66
+ df["timestamp"] = pd.to_datetime(df["Date"] + ' ' + df["Hour"].astype(str) + ":00:00")
67
+ # Keep only necessary columns
68
+ df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
69
+ # Rename column
70
+ df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
71
+ elif "timestamp" in df.columns:
72
+ # If 'timestamp' column exists, rename the value column if necessary
73
+ if "Hourly_Labor_Hours_Total" in df.columns:
74
+ df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
75
+ df = df[["timestamp", "value"]]
76
+ else:
77
+ raise ValueError("Input data must have either 'Date' and 'Hour' columns, or a 'timestamp' column.")
78
  return df
79
 
80
  def master(file):
81
  # read file
82
  data = pd.read_csv(file.name)
83
+ data['timestamp'] = pd.to_datetime(data['timestamp'])
84
 
85
  # clean data
86
  data = clean_data(data)