import streamlit as st import re import datetime import copy # Absolute Date [or pieces of it] mdy_abs_1 = re.compile('[^0-9][0-9]{1,2}[-\/.][0-9]{1,2}([-\/.][0-9]{4})?([^a-z]|$)', re.IGNORECASE) #12-13-2023, and no digits before" mdy_abs_3 = re.compile('(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|sept|september|october|november|december)\.? ?[0-9]{1,2}(th|st|nd|rd)*,? ?([0-9]{4})?', re.IGNORECASE) # July 3 mdy_abs_2 = re.compile('[0-9]{1,2}(th|st|nd|rd)* ?(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|sept|september|october|november|december),? ?([0-9]{4})?', re.IGNORECASE) m_abs_1 = re.compile('(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|sept|september|october|november|december)', re.IGNORECASE) # Just month d_abs_1 = re.compile('[0-9]{1,2}(th|st|nd|rd)*', re.IGNORECASE) # Just date # Relative Date mdy_rel_1 = re.compile('((monday|mon)|(tuesday|tue)|(wednesday|wed)|(thursday|thu)|(friday|fri)|(saturday|sat)|(sunday|sun))([^a-z]|)', re.IGNORECASE) # Days of week. Fully resolves. mdy_rel_2 = re.compile('((tom)(orrow)?.?)|(today)|(tonight)|(now)|((week)(end)?)', re.IGNORECASE) # Tomorrow. Fully resolves del_days_rel_1 = re.compile('([0-9]|( |a|the|one|two|three|four|five|six|seven|eight|nine)) ?days? ?', re.IGNORECASE) # "X days". Fully resolves # Absolute Time hhmm_abs_1 = re.compile('[0-9]{1,2}:?([^-\/][0-9]{0,2})? ?(am|pm|a\.m\.|p\.m\.|a m|p m|a([^a-z]|)|p([^a-z]|))', re.IGNORECASE) hhmm_abs_2 = re.compile(' (noon|midnight)', re.IGNORECASE) # Relative Time del_hours_rel_1 = re.compile('([0-9]{1,2}|(|a|the|one|two|three|few|four|five|six|seven|eight|nine|ten)) ?((hour|hrs.?)|(min(ute)?))s?', re.IGNORECASE) adverbs = re.compile('(from|after|before|next|end|this|past) ', re.IGNORECASE) # Adverbs used to resolve if multiple patterns are "hit". This leads to the "addition" problem (described on notion). def abs_date_parse(snip): month_dict = {'jan': 1, 'january': 1, 'feb': 2, 'february': 2, 'mar': 3, 'march': 3, 'apr': 4, 'april': 4, 'may': 5, 'jun': 6, 'june': 6, 'jul': 7, 'july': 7, 'aug': 8, 'august': 8, 'sep': 9, 'sept': 9, 'september': 9, 'oct': 10, 'october': 10, 'nov': 11, 'november': 11, 'dec': 12, 'december': 12} month, date, year = None, None, None snip = snip.replace(',', '').lower() m = re.search('[a-zA-Z]{3}', snip) d = datetime.datetime.now() current_month, current_date, current_year = d.month, d.date, d.year if m is not None: words = snip.split(' ') for w in words: if w in month_dict: month = month_dict[w] continue w = w.replace('st', '').replace('nd', '').replace('rd', '').replace('th', '') if w.isdigit() and int(w) > 31: year = int(w) if w.isdigit() and int(w) <= 31: date = int(w) if month is None: raise Exception("There should have been a month, but could not identify") if date is None: raise Exception("There should have been a date, but could not identify") if year is None: # May need to check around year-changes if this gets funky year = d.year if datetime.datetime(year, month, date) < datetime.datetime.now(): year = d.year + 1 else: if '-' in snip: tokens = snip.split('-') if '/' in snip: tokens = snip.split('/') if '.' in snip: tokens = snip.split('.') remainder = copy.deepcopy(tokens) for (i,t) in enumerate(tokens): if int(t) > 31 and year is None: year = int(t) remainder.remove(t) if int(t) > 12 and int(t) <= 31 and date is None: date = int(t) remainder.remove(t) if len(remainder) == 1: # Just fit it where it belongs if month is None: month = int(remainder[0]) if date is None: date = int(remainder[0]) if len(remainder) == 2: # Probably lack of clarity on month and date print("There are multiple options for what this could mean. Going to select shortest one.") test_date_1 = datetime.datetime(d.year, int(remainder[0]), int(remainder[1])) test_date_2 = datetime.datetime(d.year, int(remainder[1]), int(remainder[0])) d1, d2 = (test_date_1-d).total_seconds(), (test_date_2-d).total_seconds() year = d.year if d1 < 0 and d2 < 0: test_date_1 = datetime.datetime(d.year+1, int(remainder[0]), int(remainder[1])) test_date_2 = datetime.datetime(d.year+1, int(remainder[1]), int(remainder[0])) d1, d2 = (test_date_1-d).total_seconds(), (test_date_2-d).total_seconds() year = d.year+1 if d1 > 0 and (d2 < 0 or d1 < d2): month, date = remainder[0], remainder[1] if d2 > 0 and (d1 < 0 or d2 < d1): month, date = remainder[1], remainder[0] if len(remainder) == 3: raise Exception("Something OOFED") if year is None: # May need to check around year-changes if this gets funky year = d.year if datetime.datetime(year, month, date) < datetime.datetime.now(): year = d.year + 1 return datetime.datetime(int(year), int(month), int(date)) def rel_date_parse(snip): snip = snip.lower() d = datetime.datetime.now() dow_dict = {'monday':0, 'mon':0, 'tue':1, 'tues':1, 'tuesday':1, 'wed':2, 'weds':2, 'wednesday':2, 'thu':3, 'thurs':3, 'fri':4, 'friday':4, 'sat':5, 'saturday':5, 'sun':6, 'sunday':6} dow_list = list(dow_dict.keys()) for dow in dow_list: if dow in snip: current_dow = d.weekday() dow = dow_dict[dow] if dow < current_dow: # If they say same day, should that be next week? Or this week?. Need to fix eventyally print("--------------THIS DAY-OF-WEEK HAS PASSED----ADDING +7 DAYS------------------") dow += 7 return datetime.timedelta(days=dow-current_dow) if "tomorrow" in snip: return datetime.timedelta(days=1) if "now" in snip or "today" in snip or "tonight" in snip: return datetime.timedelta(days=0) if "day" in snip.split(' ') or "days" in snip.split(' '): if "one" in snip.split(' ') or " day" and "days" not in snip.split(' '): return datetime.timedelta(days=1) if "two" in snip.split(' ') or "2" in snip.split(' '): return datetime.timedelta(days=2) if "three" in snip.split(' ') or "3" in snip.split(' '): return datetime.timedelta(days=3) if "four" in snip.split(' ') or "4" in snip.split(' '): return datetime.timedelta(days=4) if "five" in snip.split(' ') or "5" in snip.split(' '): return datetime.timedelta(days=5) if "six" in snip.split(' ') or "6" in snip.split(' '): return datetime.timedelta(days=6) if "seven" in snip.split(' ') or "7" in snip.split(' '): return datetime.timedelta(days=7) if "eight" in snip.split(' ') or "8" in snip.split(' '): return datetime.timedelta(days=8) if "nine" in snip.split(' ') or "9" in snip.split(' '): return datetime.timedelta(days=9) if "ten" in snip.split(' ') or "10" in snip.split(' '): return datetime.timedelta(days=10) def abs_time_parse(snip, inp): snip = snip.lower() if ':' in snip: hours = int(snip.split(':')[0].strip()) minutes = int(snip.split(':')[1][0:2].strip()) elif 'noon' in snip: hours, minutes = 12, 0 elif 'midnight' in snip: hours, minutes = 0, 0 else: digits = [c for c in snip if c.isdigit()] if len(digits) >= 3: hours = int("".join(digits[:-2])) minutes = int("".join(digits[-2:])) if len(digits) <= 2: hours = int("".join(digits)) minutes = 0 if 'p' in snip and hours < 12: hours += 12 if 'a' in snip and hours >= 12: hours -= 12 if 'p' not in snip and 'a' not in snip: print("\nNo 'AM' or 'PM' provided. Making assumptions.") pass return datetime.time(hour=hours, minute=minutes) def rel_time_parse(snip): snip = snip.lower() digits = [c for c in snip if c.isdigit()] val = 0 if len(digits) > 0: val = int(" ".join(digits)) else: if ("hour" in snip and "hours" not in snip) or ("minute" in snip and "minutes" not in snip): val = 1 if "two" in snip: val = 2 if "three" in snip or "few" in snip: val = 3 if "four" in snip: val = 4 if "five" in snip: val = 5 if "six" in snip: val = 6 if "seven" in snip: val = 7 if "eight" in snip: val = 8 if "nine" in snip: val = 9 if "ten" in snip: val = 10 if "hour" in snip and "minute" not in snip: return datetime.timedelta(hours=val) if "minute" in snip and "hour" not in snip: return datetime.timedelta(minutes=val) if "hour" in snip and "minute" in snip: raise Exception("Does not support a relative time input with both (i) hours and (ii) minutes") def get_relevant_adverb(inp, term_1, term_2, adverbs, adv_inds): m_1 = re.search(re.compile(term_1, re.IGNORECASE), inp) m_2 = re.search(re.compile(term_2,re.IGNORECASE), inp) if m_1.span()[1] < m_2.span()[0]: start, end = m_1.span()[1], m_2.span()[0] if m_2.span()[1] < m_1.span()[0]: start, end = m_2.span()[1], m_1.span()[0] for (i, (s, e)) in enumerate(adv_inds): if s >= start and e <= end: return i, adverbs[i] return None, None def rel_rel_date_resolver(inp, term_1, term_2, td_1, td_2, adverbs, adv_inds): term_1_start = re.search(re.compile(term_1, re.IGNORECASE), inp).span()[0] term_2_start = re.search(re.compile(term_2, re.IGNORECASE), inp).span()[0] adv_ind, adv = get_relevant_adverb(inp, term_1, term_2, adverbs, adv_inds) if adv in ["after", "from", "past"]: return td_1 + td_2, adv_ind if adv in ["before"]: if term_1_start < term_2_start: return td_2-td_1, adv_ind else: return td_1-td_2, adv_ind if adv is None: if td_1 == td_2: # Assume that they are the same print("-------------------ASSUMING CONSISTENCY BETWEEN RELATIVES-------------------------") return td_1, adv_ind else: raise Exception("There is a conflict with the datetimes provided. '" + term_1.upper() + "' != '" + term_2.upper() + "'") def abs_rel_date_resolver(inp, term_abs, term_rel, abs_dt, rel_td, adverbs, adv_inds): term_abs_start = re.search(re.compile(term_abs, re.IGNORECASE), inp).span()[0] term_rel_start = re.search(re.compile(term_rel, re.IGNORECASE), inp).span()[0] adv_ind, adv = get_relevant_adverb(inp, term_abs, term_rel, adverbs, adv_inds) if adv in ["after", "from", "past"]: return rel_td if adv in ["before"]: return -rel_td if adv is None: # Check consistency if abs_dt.month == (datetime.datetime.now()+rel_td).month and abs_dt.day == (datetime.datetime.now()+rel_td).day: # The same print("-------------------------REDUNDANCY DETECTED--------------------------------------") return datetime.timedelta(days=0, hours=0, minutes=0) else: raise Exception("There is a conflict with the datetimes provided. '" + str(abs_dt).upper() + "' != '" + term_rel.upper() + "'") def time_parse(inp, debug=False): match_mdy_abs_1 = re.search(mdy_abs_1, inp) match_mdy_abs_2 = re.search(mdy_abs_2, inp) match_mdy_abs_3 = re.search(mdy_abs_3, inp) match_m_abs_1 = re.search(m_abs_1, inp) match_d_abs_1 = re.search(d_abs_1, inp) match_mdy_rel_1 = re.search(mdy_rel_1, inp) match_mdy_rel_2 = re.search(mdy_rel_2, inp) match_del_days_rel_1 = re.search(del_days_rel_1, inp) # Should be re.finditer in case people go crazy m_adverbs = re.search(adverbs, inp) abs_date_extracted = False rel_date_extracted = False abs_date_extraction = "" rel_date_extraction = [] adv = [] if match_mdy_abs_1 is not None: # Unclear, do nearest neighbor abs_date_extraction = inp[match_mdy_abs_1.span()[0]:match_mdy_abs_1.span()[1]].strip() abs_date_extracted = True if match_mdy_abs_2 is not None and not abs_date_extracted: # Month first abs_date_extraction = inp[match_mdy_abs_2.span()[0]:match_mdy_abs_2.span()[1]].strip() abs_date_extracted = True if match_mdy_abs_3 is not None and not abs_date_extracted: # Date first abs_date_extraction = inp[match_mdy_abs_3.span()[0]:match_mdy_abs_3.span()[1]].strip() abs_date_extracted = True if match_m_abs_1 is not None and match_d_abs_1 is not None and not abs_date_extracted: # Fix this to be in order? Or make this a new regex? abs_date_extraction = inp[match_m_abs_1.span()[0]:match_m_abs_1.span()[1]].strip() abs_date_extraction += inp[match_d_abs_1.span()[0]:match_d_abs_1.span()[1]].strip() if match_mdy_rel_1 is not None: rel_date_extracted = True rel_date_extraction.append(inp[match_mdy_rel_1.span()[0]:match_mdy_rel_1.span()[1]].strip()) if match_mdy_rel_2 is not None: # Technically, should do re.finditer in case it appears multiple times rel_date_extracted = True rel_date_extraction.append(inp[match_mdy_rel_2.span()[0]:match_mdy_rel_2.span()[1]].strip()) if match_del_days_rel_1 is not None: # Technically, should do re.finditer in case it appears multiple times rel_date_extracted = True rel_date_extraction.append(inp[match_del_days_rel_1.span()[0]:match_del_days_rel_1.span()[1]].strip()) adv_inds = [] advs = [] if m_adverbs is not None: # Remember, this is used for addition adv_inds = ([(m.start(0), m.end(0)) for m in re.finditer(adverbs,inp)]) for (s,e) in adv_inds: advs.append(inp[s:e].strip()) purged = inp purged = inp.replace(abs_date_extraction, '') for rde in rel_date_extraction: purged = purged.replace(rde, '') abs_time_extracted = False rel_time_extracted = False abs_time_extraction = [] rel_time_extraction = [] match_hhmm_abs_1 = re.search(hhmm_abs_1, purged) # Remember, we should technically only operate on non-months match_hhmm_abs_2 = re.search(hhmm_abs_2, purged) match_del_hours_rel_1 = re.search(del_hours_rel_1, purged) if match_hhmm_abs_1 is not None: abs_time_extracted = True abs_time_extraction.append(purged[match_hhmm_abs_1.span()[0]:match_hhmm_abs_1.span()[1]].strip()) if match_hhmm_abs_2 is not None: abs_time_extracted = True abs_time_extraction.append(purged[match_hhmm_abs_2.span()[0]:match_hhmm_abs_2.span()[1]].strip()) if match_del_hours_rel_1 is not None: rel_time_extraction.append(purged[match_del_hours_rel_1.span()[0]:match_del_hours_rel_1.span()[1]].strip()) if debug: print("Abs Date: \t\t" + abs_date_extraction) print("Rel Date: \t\t" + str(rel_date_extraction)) print("Abs Time: \t\t" + str(abs_time_extraction)) print("Rel Time: \t\t" + str(rel_time_extraction)) print("Adverbs: \t\t" + str(advs)) d = datetime.datetime.now() abs_date = None rel_date_deltas = [] abs_times = [] rel_time_deltas = [] if abs_date_extraction != "": abs_date = abs_date_parse(abs_date_extraction) for rde in rel_date_extraction: rel_date_deltas.append(rel_date_parse(rde)) for ate in abs_time_extraction: abs_times.append(abs_time_parse(ate, inp)) for rte in rel_time_extraction: rel_time_deltas.append(rel_time_parse(rte)) if debug: print('\n\n') print('Current Date: \t\t' + str(d)) print('Abs Date: \t\t' + str(abs_date)) print('Rel Date Deltas: \t' + str(rel_date_deltas)) print('Abs Time: \t\t' + str(abs_times)) print('Rel Time Deltas: \t' + str(rel_time_deltas)) if len(abs_times) == 0 and len(rel_time_deltas) == 0: raise Exception("Could not identify a time. Be sure to use 'AM/PM' if you specify an absolute time") datetime_request = None # Relative Time Deltas Resolved Everythign Else if len(rel_time_deltas) == 1 and abs_date is None and len(rel_date_deltas) == 0 and len(abs_times) == 0: # Straight up "in X hours" datetime_request = d + rel_time_deltas[0] return datetime_request if len(rel_time_deltas) == 1 and abs_date is None and len(rel_date_deltas) == 1 and len(abs_times) == 0: datetime_request = d + rel_time_deltas[0] + rel_date_deltas[0] return datetime_request if abs_date is not None and len(rel_date_deltas) == 0: # Regular abs date datetime_request = abs_date if len(rel_date_deltas) == 1 and abs_date is None: datetime_request = d + rel_date_deltas[0] if len(rel_date_deltas) == 2 and abs_date is None: # Two conflicting relative datetime_request = d + rel_rel_date_resolver(inp, rel_date_extraction[0], rel_date_extraction[1], rel_date_deltas[0], rel_date_deltas[1], advs, adv_inds)[0] if len(rel_date_deltas) == 1 and abs_date is not None: # Abs date conflicting with rel datetime_request = abs_date + abs_rel_date_resolver(inp, abs_date_extraction, rel_date_extraction[0], abs_date, rel_date_deltas[0], advs, adv_inds) ###### EXPERIMENTAL########### if len(rel_date_deltas) == 2 and abs_date is not None: print("--------------------------------TRYING EXPERIMENTAL CODE (METHOD 1)---------------------------") # Method 1: Resolve rel-rel, then resolve that with abs # Method 2: Resolve rel-abs, then resolve that with additional rel? For now, I only do method 1 rel_td_resolved, adv_ind = rel_rel_date_resolver(inp, rel_date_extraction[0], rel_date_extraction[1], rel_date_deltas[0], rel_date_deltas[1], advs, adv_inds) print(rel_td_resolved) if adv_ind is None: # Assuming consistency between adverbs datetime_request = abs_date + abs_rel_date_resolver(inp, abs_date_extraction, rel_date_extraction[0], abs_date, rel_td_resolved, advs, adv_inds) if adv_ind is not None: adv_inds.pop(adv_ind) advs.pop(adv_ind) datetime_request = abs_date + abs_rel_date_resolver(inp, abs_date_extraction, rel_date_extraction[0], abs_date, rel_td_resolved, advs, adv_inds) # Additional Rules: (1) 3+ Rel-rel date conflicts if len(rel_date_deltas) == 3: raise Exception("Have not yet implemented Resolver for 3 rel-rel dates") # If no date, then assume today if datetime_request is None: print("------------------Assuming datetime--------------") datetime_request = datetime.datetime.now() if len(abs_times) == 1 and len(rel_time_deltas) == 0: datetime_request = datetime_request.replace(hour=abs_times[0].hour, minute=abs_times[0].minute) # Final check: if there total time is less, then just add a day? if datetime_request < datetime.datetime.now(): print("Requested datetime is less than current one. Adding a day") datetime_request += datetime.timedelta(days=1) return datetime_request x = st.text_input("Request", value="") d = time_parse(x, debug=True) st.code('Request Time: \t\t' + d.strftime("%m/%d @ %I:%M %p"))