derek-thomas HF staff commited on
Commit
1d46c26
1 Parent(s): 1b56724

Initializing cols

Browse files
Files changed (1) hide show
  1. utilities/pushshift_data.py +12 -0
utilities/pushshift_data.py CHANGED
@@ -146,9 +146,21 @@ def submissions_to_dataframe(submissions: List[Dict[str, Any]]) -> pd.DataFrame:
146
  'downs', 'ups']
147
  df = pd.DataFrame(submissions)
148
  df = df.convert_dtypes()
 
 
 
 
 
 
 
 
 
149
  df = df[cols]
 
150
  # Convert the "created_utc" column to a datetime column with timezone information
151
  df['created_utc'] = pd.to_datetime(df['created_utc'], unit='s').dt.tz_localize('UTC')
 
 
152
  df['date'] = df['created_utc'].dt.date.astype(str)
153
  df['time'] = df['created_utc'].dt.time.astype(str)
154
  return df
 
146
  'downs', 'ups']
147
  df = pd.DataFrame(submissions)
148
  df = df.convert_dtypes()
149
+
150
+ # As of Jan 2017 Im getting an error:
151
+ # KeyError: "['downs', 'ups'] not in index"
152
+ # To maintain backwards compatibility I will initialize these cols
153
+ for col in cols:
154
+ if col not in df.columns:
155
+ df[col] = None
156
+
157
+ # Take the subset of columns
158
  df = df[cols]
159
+
160
  # Convert the "created_utc" column to a datetime column with timezone information
161
  df['created_utc'] = pd.to_datetime(df['created_utc'], unit='s').dt.tz_localize('UTC')
162
+
163
+ # Using native type date and time had some incompatibility with the datasets visualization widget
164
  df['date'] = df['created_utc'].dt.date.astype(str)
165
  df['time'] = df['created_utc'].dt.time.astype(str)
166
  return df