Shrikrishna commited on
Commit
acfb3f7
1 Parent(s): f71f53c

Upload preprocessor.py

Browse files
Files changed (1) hide show
  1. preprocessor.py +5 -3
preprocessor.py CHANGED
@@ -2,14 +2,16 @@ import re
2
  import pandas as pd
3
 
4
  def preprocess(data):
5
- pattern = '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'
 
6
 
7
  messages = re.split(pattern, data)[1:]
8
  dates = re.findall(pattern, data)
9
 
10
  df = pd.DataFrame({'user_message': messages, 'message_date': dates})
11
  # convert message_date type
12
- df['message_date'] = pd.to_datetime(df['message_date'], format='%d/%m/%Y, %H:%M - ')
 
13
 
14
  df.rename(columns={'message_date': 'date'}, inplace=True)
15
 
@@ -40,7 +42,7 @@ def preprocess(data):
40
  period = []
41
  for hour in df[['day_name', 'hour']]['hour']:
42
  if hour == 23:
43
- period.append(str(hour) + "-" + str('00'))
44
  elif hour == 0:
45
  period.append(str('00') + "-" + str(hour + 1))
46
  else:
 
2
  import pandas as pd
3
 
4
  def preprocess(data):
5
+ #pattern = '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'
6
+ pattern = '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s\S{2}\s-\s'
7
 
8
  messages = re.split(pattern, data)[1:]
9
  dates = re.findall(pattern, data)
10
 
11
  df = pd.DataFrame({'user_message': messages, 'message_date': dates})
12
  # convert message_date type
13
+ #df['message_date'] = pd.to_datetime(df['message_date'], format='%d/%m/%Y, %H:%M - ')
14
+ df['message_date'] = pd.to_datetime(df['message_date'], format='%d/%m/%Y, %H:%M %p - ')
15
 
16
  df.rename(columns={'message_date': 'date'}, inplace=True)
17
 
 
42
  period = []
43
  for hour in df[['day_name', 'hour']]['hour']:
44
  if hour == 23:
45
+ period.append(str(hour) + "-" + str('1'))
46
  elif hour == 0:
47
  period.append(str('00') + "-" + str(hour + 1))
48
  else: