TimeParser / app.py
cools's picture
Update app.py
36b4b49
import streamlit as st
import re
import datetime
import copy
# Absolute Date [or pieces of it]
mdy_abs_1 = re.compile('[^0-9][0-9]{1,2}[-\/.][0-9]{1,2}([-\/.][0-9]{4})?([^a-z]|$)', re.IGNORECASE) #12-13-2023, and no digits before"
mdy_abs_3 = re.compile('(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|sept|september|october|november|december)\.? ?[0-9]{1,2}(th|st|nd|rd)*,? ?([0-9]{4})?', re.IGNORECASE) # July 3
mdy_abs_2 = re.compile('[0-9]{1,2}(th|st|nd|rd)* ?(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|sept|september|october|november|december),? ?([0-9]{4})?', re.IGNORECASE)
m_abs_1 = re.compile('(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|sept|september|october|november|december)', re.IGNORECASE) # Just month
d_abs_1 = re.compile('[0-9]{1,2}(th|st|nd|rd)*', re.IGNORECASE) # Just date
# Relative Date
mdy_rel_1 = re.compile('((monday|mon)|(tuesday|tue)|(wednesday|wed)|(thursday|thu)|(friday|fri)|(saturday|sat)|(sunday|sun))([^a-z]|)', re.IGNORECASE) # Days of week. Fully resolves.
mdy_rel_2 = re.compile('((tom)(orrow)?.?)|(today)|(tonight)|(now)|((week)(end)?)', re.IGNORECASE) # Tomorrow. Fully resolves
del_days_rel_1 = re.compile('([0-9]|( |a|the|one|two|three|four|five|six|seven|eight|nine)) ?days? ?', re.IGNORECASE) # "X days". Fully resolves
# Absolute Time
hhmm_abs_1 = re.compile('[0-9]{1,2}:?([^-\/][0-9]{0,2})? ?(am|pm|a\.m\.|p\.m\.|a m|p m|a([^a-z]|)|p([^a-z]|))', re.IGNORECASE)
hhmm_abs_2 = re.compile(' (noon|midnight)', re.IGNORECASE)
# Relative Time
del_hours_rel_1 = re.compile('([0-9]{1,2}|(|a|the|one|two|three|few|four|five|six|seven|eight|nine|ten)) ?((hour|hrs.?)|(min(ute)?))s?', re.IGNORECASE)
adverbs = re.compile('(from|after|before|next|end|this|past) ', re.IGNORECASE) # Adverbs used to resolve if multiple patterns are "hit". This leads to the "addition" problem (described on notion).
def abs_date_parse(snip):
month_dict = {'jan': 1, 'january': 1, 'feb': 2, 'february': 2, 'mar': 3, 'march': 3, 'apr': 4, 'april': 4, 'may': 5, 'jun': 6, 'june': 6, 'jul': 7, 'july': 7, 'aug': 8, 'august': 8, 'sep': 9, 'sept': 9, 'september': 9, 'oct': 10, 'october': 10, 'nov': 11, 'november': 11, 'dec': 12, 'december': 12}
month, date, year = None, None, None
snip = snip.replace(',', '').lower()
m = re.search('[a-zA-Z]{3}', snip)
d = datetime.datetime.now()
current_month, current_date, current_year = d.month, d.date, d.year
if m is not None:
words = snip.split(' ')
for w in words:
if w in month_dict:
month = month_dict[w]
continue
w = int("".join([c for c in w if c.isdigit()]))
if w > 31:
year = w
if w <= 31:
date = w
if month is None:
raise Exception("There should have been a month, but could not identify")
if date is None:
raise Exception("There should have been a date, but could not identify")
if year is None: # May need to check around year-changes if this gets funky
year = d.year
if datetime.datetime(year, month, date) < datetime.datetime.now():
year = d.year + 1
else:
if '-' in snip:
tokens = snip.split('-')
if '/' in snip:
tokens = snip.split('/')
if '.' in snip:
tokens = snip.split('.')
remainder = copy.deepcopy(tokens)
for (i,t) in enumerate(tokens):
if int(t) > 31 and year is None:
year = int(t)
remainder.remove(t)
if int(t) > 12 and int(t) <= 31 and date is None:
date = int(t)
remainder.remove(t)
if len(remainder) == 1: # Just fit it where it belongs
if month is None:
month = int(remainder[0])
if date is None:
date = int(remainder[0])
if len(remainder) == 2: # Probably lack of clarity on month and date
print("There are multiple options for what this could mean. Going to select shortest one.")
test_date_1 = datetime.datetime(d.year, int(remainder[0]), int(remainder[1]))
test_date_2 = datetime.datetime(d.year, int(remainder[1]), int(remainder[0]))
d1, d2 = (test_date_1-d).total_seconds(), (test_date_2-d).total_seconds()
year = d.year
if d1 < 0 and d2 < 0:
test_date_1 = datetime.datetime(d.year+1, int(remainder[0]), int(remainder[1]))
test_date_2 = datetime.datetime(d.year+1, int(remainder[1]), int(remainder[0]))
d1, d2 = (test_date_1-d).total_seconds(), (test_date_2-d).total_seconds()
year = d.year+1
if d1 > 0 and (d2 < 0 or d1 < d2):
month, date = remainder[0], remainder[1]
if d2 > 0 and (d1 < 0 or d2 < d1):
month, date = remainder[1], remainder[0]
if len(remainder) == 3:
raise Exception("Something OOFED")
if year is None: # May need to check around year-changes if this gets funky
year = d.year
if datetime.datetime(year, month, date) < datetime.datetime.now():
year = d.year + 1
return datetime.datetime(int(year), int(month), int(date))
def rel_date_parse(snip):
snip = snip.lower()
d = datetime.datetime.now()
dow_dict = {'monday':0, 'mon':0, 'tue':1, 'tues':1, 'tuesday':1, 'wed':2, 'weds':2, 'wednesday':2, 'thu':3, 'thurs':3, 'fri':4, 'friday':4, 'sat':5, 'saturday':5, 'sun':6, 'sunday':6}
dow_list = list(dow_dict.keys())
for dow in dow_list:
if dow in snip:
current_dow = d.weekday()
dow = dow_dict[dow]
if dow < current_dow: # If they say same day, should that be next week? Or this week?. Need to fix eventyally
print("--------------THIS DAY-OF-WEEK HAS PASSED----ADDING +7 DAYS------------------")
dow += 7
return datetime.timedelta(days=dow-current_dow)
if "tomorrow" in snip:
return datetime.timedelta(days=1)
if "week" in snip:
return datetime.timedelta(days=7)
if "now" in snip or "today" in snip or "tonight" in snip:
return datetime.timedelta(days=0)
if "day" in snip.split(' ') or "days" in snip.split(' '):
if "one" in snip.split(' ') or " day" and "days" not in snip.split(' '):
return datetime.timedelta(days=1)
if "two" in snip.split(' ') or "2" in snip.split(' '):
return datetime.timedelta(days=2)
if "three" in snip.split(' ') or "3" in snip.split(' '):
return datetime.timedelta(days=3)
if "four" in snip.split(' ') or "4" in snip.split(' '):
return datetime.timedelta(days=4)
if "five" in snip.split(' ') or "5" in snip.split(' '):
return datetime.timedelta(days=5)
if "six" in snip.split(' ') or "6" in snip.split(' '):
return datetime.timedelta(days=6)
if "seven" in snip.split(' ') or "7" in snip.split(' '):
return datetime.timedelta(days=7)
if "eight" in snip.split(' ') or "8" in snip.split(' '):
return datetime.timedelta(days=8)
if "nine" in snip.split(' ') or "9" in snip.split(' '):
return datetime.timedelta(days=9)
if "ten" in snip.split(' ') or "10" in snip.split(' '):
return datetime.timedelta(days=10)
def abs_time_parse(snip, inp):
snip = snip.lower()
if ':' in snip:
hours = int(snip.split(':')[0].strip())
minutes = int(snip.split(':')[1][0:2].strip())
elif 'noon' in snip:
hours, minutes = 12, 0
elif 'midnight' in snip:
hours, minutes = 0, 0
else:
digits = [c for c in snip if c.isdigit()]
if len(digits) >= 3:
hours = int("".join(digits[:-2]))
minutes = int("".join(digits[-2:]))
if len(digits) <= 2:
hours = int("".join(digits))
minutes = 0
if 'p' in snip and hours < 12:
hours += 12
if 'a' in snip and hours >= 12:
hours -= 12
if 'p' not in snip and 'a' not in snip:
print("\nNo 'AM' or 'PM' provided. Making assumptions.")
pass
return datetime.time(hour=hours, minute=minutes)
def rel_time_parse(snip):
snip = snip.lower()
digits = [c for c in snip if c.isdigit()]
val = 0
if len(digits) > 0:
val = int("".join(digits))
else:
if ("hour" in snip and "hours" not in snip) or ("minute" in snip and "minutes" not in snip):
val = 1
if "two" in snip:
val = 2
if "three" in snip or "few" in snip:
val = 3
if "four" in snip:
val = 4
if "five" in snip:
val = 5
if "six" in snip:
val = 6
if "seven" in snip:
val = 7
if "eight" in snip:
val = 8
if "nine" in snip:
val = 9
if "ten" in snip:
val = 10
if "hour" in snip and "minute" not in snip:
return datetime.timedelta(hours=val)
if "minute" in snip and "hour" not in snip:
return datetime.timedelta(minutes=val)
if "hour" in snip and "minute" in snip:
raise Exception("Does not support a relative time input with both (i) hours and (ii) minutes")
def get_relevant_adverb(inp, term_1, term_2, adverbs, adv_inds):
m_1 = re.search(re.compile(term_1, re.IGNORECASE), inp)
m_2 = re.search(re.compile(term_2,re.IGNORECASE), inp)
if m_1.span()[1] < m_2.span()[0]:
start, end = m_1.span()[1], m_2.span()[0]
if m_2.span()[1] < m_1.span()[0]:
start, end = m_2.span()[1], m_1.span()[0]
for (i, (s, e)) in enumerate(adv_inds):
if s >= start and e <= end:
return i, adverbs[i]
return None, None
def rel_rel_date_resolver(inp, term_1, term_2, td_1, td_2, adverbs, adv_inds):
term_1_start = re.search(re.compile(term_1, re.IGNORECASE), inp).span()[0]
term_2_start = re.search(re.compile(term_2, re.IGNORECASE), inp).span()[0]
adv_ind, adv = get_relevant_adverb(inp, term_1, term_2, adverbs, adv_inds)
if adv in ["after", "from", "past"]:
return td_1 + td_2, adv_ind
if adv in ["before"]:
if term_1_start < term_2_start:
return td_2-td_1, adv_ind
else:
return td_1-td_2, adv_ind
if adv is None:
if td_1 == td_2: # Assume that they are the same
print("-------------------ASSUMING CONSISTENCY BETWEEN RELATIVES-------------------------")
return td_1, adv_ind
else:
raise Exception("There is a conflict with the datetimes provided. '" + term_1.upper() + "' != '" + term_2.upper() + "'")
def abs_rel_date_resolver(inp, term_abs, term_rel, abs_dt, rel_td, adverbs, adv_inds):
term_abs_start = re.search(re.compile(term_abs, re.IGNORECASE), inp).span()[0]
term_rel_start = re.search(re.compile(term_rel, re.IGNORECASE), inp).span()[0]
adv_ind, adv = get_relevant_adverb(inp, term_abs, term_rel, adverbs, adv_inds)
if adv in ["after", "from", "past"]:
return rel_td
if adv in ["before"]:
return -rel_td
if adv is None: # Check consistency
if abs_dt.month == (datetime.datetime.now()+rel_td).month and abs_dt.day == (datetime.datetime.now()+rel_td).day: # The same
print("-------------------------REDUNDANCY DETECTED--------------------------------------")
return datetime.timedelta(days=0, hours=0, minutes=0)
else:
raise Exception("There is a conflict with the datetimes provided. '" + str(abs_dt).upper() + "' != '" + term_rel.upper() + "'")
def time_parse(inp, debug=False):
match_mdy_abs_1 = re.search(mdy_abs_1, inp)
match_mdy_abs_2 = re.search(mdy_abs_2, inp)
match_mdy_abs_3 = re.search(mdy_abs_3, inp)
match_m_abs_1 = re.search(m_abs_1, inp)
match_d_abs_1 = re.search(d_abs_1, inp)
match_mdy_rel_1 = re.search(mdy_rel_1, inp)
match_mdy_rel_2 = re.search(mdy_rel_2, inp)
match_del_days_rel_1 = re.search(del_days_rel_1, inp) # Should be re.finditer in case people go crazy
m_adverbs = re.search(adverbs, inp)
abs_date_extracted = False
rel_date_extracted = False
abs_date_extraction = ""
rel_date_extraction = []
adv = []
if match_mdy_abs_1 is not None: # Unclear, do nearest neighbor
abs_date_extraction = inp[match_mdy_abs_1.span()[0]:match_mdy_abs_1.span()[1]].strip()
abs_date_extracted = True
if match_mdy_abs_2 is not None and not abs_date_extracted: # Month first
abs_date_extraction = inp[match_mdy_abs_2.span()[0]:match_mdy_abs_2.span()[1]].strip()
abs_date_extracted = True
if match_mdy_abs_3 is not None and not abs_date_extracted: # Date first
abs_date_extraction = inp[match_mdy_abs_3.span()[0]:match_mdy_abs_3.span()[1]].strip()
abs_date_extracted = True
if match_m_abs_1 is not None and match_d_abs_1 is not None and not abs_date_extracted: # Fix this to be in order? Or make this a new regex?
abs_date_extraction = inp[match_m_abs_1.span()[0]:match_m_abs_1.span()[1]].strip()
abs_date_extraction += inp[match_d_abs_1.span()[0]:match_d_abs_1.span()[1]].strip()
if match_mdy_rel_1 is not None:
rel_date_extracted = True
rel_date_extraction.append(inp[match_mdy_rel_1.span()[0]:match_mdy_rel_1.span()[1]].strip())
if match_mdy_rel_2 is not None: # Technically, should do re.finditer in case it appears multiple times
rel_date_extracted = True
rel_date_extraction.append(inp[match_mdy_rel_2.span()[0]:match_mdy_rel_2.span()[1]].strip())
if match_del_days_rel_1 is not None: # Technically, should do re.finditer in case it appears multiple times
rel_date_extracted = True
rel_date_extraction.append(inp[match_del_days_rel_1.span()[0]:match_del_days_rel_1.span()[1]].strip())
adv_inds = []
advs = []
if m_adverbs is not None: # Remember, this is used for addition
adv_inds = ([(m.start(0), m.end(0)) for m in re.finditer(adverbs,inp)])
for (s,e) in adv_inds:
advs.append(inp[s:e].strip())
purged = inp
purged = inp.replace(abs_date_extraction, '')
for rde in rel_date_extraction:
purged = purged.replace(rde, '')
abs_time_extracted = False
rel_time_extracted = False
abs_time_extraction = []
rel_time_extraction = []
match_hhmm_abs_1 = re.search(hhmm_abs_1, purged) # Remember, we should technically only operate on non-months
match_hhmm_abs_2 = re.search(hhmm_abs_2, purged)
match_del_hours_rel_1 = re.search(del_hours_rel_1, purged)
if match_hhmm_abs_1 is not None:
abs_time_extracted = True
abs_time_extraction.append(purged[match_hhmm_abs_1.span()[0]:match_hhmm_abs_1.span()[1]].strip())
if match_hhmm_abs_2 is not None:
abs_time_extracted = True
abs_time_extraction.append(purged[match_hhmm_abs_2.span()[0]:match_hhmm_abs_2.span()[1]].strip())
if match_del_hours_rel_1 is not None:
rel_time_extraction.append(purged[match_del_hours_rel_1.span()[0]:match_del_hours_rel_1.span()[1]].strip())
if debug:
print("Abs Date: \t\t" + abs_date_extraction)
print("Rel Date: \t\t" + str(rel_date_extraction))
print("Abs Time: \t\t" + str(abs_time_extraction))
print("Rel Time: \t\t" + str(rel_time_extraction))
print("Adverbs: \t\t" + str(advs))
d = datetime.datetime.now()
abs_date = None
rel_date_deltas = []
abs_times = []
rel_time_deltas = []
if abs_date_extraction != "":
abs_date = abs_date_parse(abs_date_extraction)
for rde in rel_date_extraction:
rel_date_deltas.append(rel_date_parse(rde))
for ate in abs_time_extraction:
abs_times.append(abs_time_parse(ate, inp))
for rte in rel_time_extraction:
rel_time_deltas.append(rel_time_parse(rte))
if debug:
print('\n\n')
print('Current Date: \t\t' + str(d))
print('Abs Date: \t\t' + str(abs_date))
print('Rel Date Deltas: \t' + str(rel_date_deltas))
print('Abs Time: \t\t' + str(abs_times))
print('Rel Time Deltas: \t' + str(rel_time_deltas))
if len(abs_times) == 0 and len(rel_time_deltas) == 0:
raise Exception("Could not identify a time. Be sure to use 'AM/PM' if you specify an absolute time")
datetime_request = None
# Relative Time Deltas Resolved Everythign Else
if len(rel_time_deltas) == 1 and abs_date is None and len(rel_date_deltas) == 0 and len(abs_times) == 0: # Straight up "in X hours"
datetime_request = d + rel_time_deltas[0]
return datetime_request
if len(rel_time_deltas) == 1 and abs_date is None and len(rel_date_deltas) == 1 and len(abs_times) == 0:
datetime_request = d + rel_time_deltas[0] + rel_date_deltas[0]
return datetime_request
if abs_date is not None and len(rel_date_deltas) == 0: # Regular abs date
datetime_request = abs_date
if len(rel_date_deltas) == 1 and abs_date is None:
datetime_request = d + rel_date_deltas[0]
if len(rel_date_deltas) == 2 and abs_date is None: # Two conflicting relative
datetime_request = d + rel_rel_date_resolver(inp, rel_date_extraction[0], rel_date_extraction[1], rel_date_deltas[0], rel_date_deltas[1], advs, adv_inds)[0]
if len(rel_date_deltas) == 1 and abs_date is not None: # Abs date conflicting with rel
datetime_request = abs_date + abs_rel_date_resolver(inp, abs_date_extraction, rel_date_extraction[0], abs_date, rel_date_deltas[0], advs, adv_inds)
###### EXPERIMENTAL###########
if len(rel_date_deltas) == 2 and abs_date is not None:
print("--------------------------------TRYING EXPERIMENTAL CODE (METHOD 1)---------------------------")
# Method 1: Resolve rel-rel, then resolve that with abs
# Method 2: Resolve rel-abs, then resolve that with additional rel? For now, I only do method 1
rel_td_resolved, adv_ind = rel_rel_date_resolver(inp, rel_date_extraction[0], rel_date_extraction[1], rel_date_deltas[0], rel_date_deltas[1], advs, adv_inds)
print(rel_td_resolved)
if adv_ind is None: # Assuming consistency between adverbs
datetime_request = abs_date + abs_rel_date_resolver(inp, abs_date_extraction, rel_date_extraction[0], abs_date, rel_td_resolved, advs, adv_inds)
if adv_ind is not None:
adv_inds.pop(adv_ind)
advs.pop(adv_ind)
datetime_request = abs_date + abs_rel_date_resolver(inp, abs_date_extraction, rel_date_extraction[0], abs_date, rel_td_resolved, advs, adv_inds)
# Additional Rules: (1) 3+ Rel-rel date conflicts
if len(rel_date_deltas) == 3:
raise Exception("Have not yet implemented Resolver for 3 rel-rel dates")
# If no date, then assume today
if datetime_request is None:
print("------------------Assuming datetime--------------")
datetime_request = datetime.datetime.now()
if len(abs_times) == 1 and len(rel_time_deltas) == 0:
datetime_request = datetime_request.replace(hour=abs_times[0].hour, minute=abs_times[0].minute)
# Final check: if there total time is less, then just add a day?
if datetime_request < datetime.datetime.now():
print("Requested datetime is less than current one. Adding a day")
datetime_request += datetime.timedelta(days=1)
return datetime_request
x = st.text_input("Request", value="")
d = time_parse(x, debug=True)
st.code('Request Time: \t\t' + d.strftime("%m/%d @ %I:%M %p"))