#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 28 18:21:59 2024

@author: selbl
"""

import streamlit as st
from script import WhatsappAnalysis
import re
import pandas as pd

def main():
    st.set_page_config(
        page_title="Whatsapp Conversation Analysis",
        page_icon=":bar_chart:",  # You can use an emoji or a URL to an image
        #layout="wide",
        initial_sidebar_state="auto",
    )
    st.header("Whatsapp Chat Analysis",divider="blue")
    st.text("Always wanted to know who talks the most in your Whatsapp groups?")
    st.text("Want to check how much more active your group has been in the past years?")
    st.text("This script will take care of doing the hard word for you!")
    st.text("Just upload your conversation as a .txt file below")
    st.text("The file you upload IS NOT SAVED")
    st.text("If you are unsure on how to get a txt file refer to the sidebar")
    st.text("Please only upload .txt files")
    # Button to generate text
    uploaded_file = st.file_uploader("Upload your conversation here",type="txt")
    #Check that file exists
    if uploaded_file is not None:
        if not uploaded_file.name.endswith(".txt"):
            st.error("Please upload a valid .txt file!")
        else:
            df,nombregrupo = buildDF(uploaded_file)
            WhatsappAnalysis(df, nombregrupo)

#Auxiliary function for converting from 12 hour to 24 hour format
def convert_to_24hour(time_str):
    hour, minute, second, am_pm = re.findall('\d+|\w+', time_str)
    hour = int(hour)
    if am_pm == 'PM' and hour != 12:
        hour += 12
    elif am_pm == 'AM' and hour == 12:
        hour = 0
    return f'{hour:02d}:{minute}:{second}'

def buildDF(file):
    #file_name = file.name
    #This can be obtained from the file name and finding what is between with and .txt
    #Define the column names of the databases we work with
    column_names = ["Date", "Time", "Sender","Message"]
    #Initialize an empty dataframe
    df = pd.DataFrame(columns = column_names)
    #Get the lines of the file
    #lines = file.iter_lines() 
    #Work with each line
    #with open(file,encoding="utf8") as f:
    #Obtain text
    #lines = f.readlines()
    #Create a counter to extract group name
    cont = 0
    #create boolean for bracket formatting (explained below)
    bracket_form = False
    #Make another boolean for 24hr format
    twentyfour_hr = True
    #Iterate over lines
    for line in file:
        #Decode
        line = line.decode('utf-8')
        #use initial line to check the format of the line
        if cont == 0:
            #If the first element is a bracket ([) then that means it has iPhone formatting
            #Which is:
            #‎[Date, Time AM/PM] User: ‎Message
            #This is opposed to Android formatting which is:
            #Date, Time (24hrs) - User: Message
            #Check by seeing if the brackets are there
            if line[0] == '[':
                bracket_form = True
            print(line)
            #Check the AM,PM time
            if (line.find('AM') != -1) or (line.find('PM') != -1):
                twentyfour_hr = False
            #Add to the counter
            cont += 1
            continue
        #Check the counter
        if cont == 1:      
            # Use re.search to find the match
            #match = re.search(r'created group "(.*?)"', str(line))
            #Extract
            #nombregrupo = match.group(1)
            x_list = line.split()
            nombregrupo = x_list[-1][1:(len(x_list[-1])-1)]
            #Increase counter so it does not bother anymore
            cont += 1
            continue
        #Check if it has delimiters
        #if (", " in line) and (": " in line) and (" - " in line):
        #With bracket format, the pattern changes
        if bracket_form:
            #Remove first bracket
            #line = line[1:]
            #Find other punctuations
            #comma_ind = line.find(',')
            #bracket_ind = line.find(']')
            #colon_ind = line.find(': ')
            #Store into a splitline
            #splitline = [line[:comma_ind],line[comma_ind+2:bracket_ind],line[bracket_ind+2:colon_ind],line[colon_ind+2:]]
            pattern = re.compile(r'(\d+/\d+/\d+), (\d+:\d+:\d+\s[APMapm]+)\] (.*?):\s(.*)')
            matches = pattern.findall(line)
            #if the pattern is not there just skip
            if not matches:
                continue
            splitline = list(matches[0])
        #Define pattern depending on the type of format
        elif (", " in line) and (": " in line) and (" - " in line):
            splitline = re.split(', | - |: ',line)
        else:
            continue
        #Split
        #splitline = re.split(', | - |: ',line)
        #Delete the \n value
        #Only process if the length of the splitline allows it
        if len(splitline) == 4:
            splitline[len(splitline)-1] = splitline[len(splitline)-1].replace("\n","",1)
            #Check if I have to move the non-24 hour format to 24 hour format
            if not twentyfour_hr:
                splitline[1] = convert_to_24hour(splitline[1])
            #If the sender is the same as the name of the group, then ignore
            if splitline[2] != nombregrupo:
                #Create line as a dictionary and then a dataframe
                line = {'Date' : splitline[0], 'Time' : splitline[1], 'Sender' : splitline[2],"Message":splitline[3]}
                line = pd.DataFrame([line])
                #Add to dataframe
                df = pd.concat([df,line],ignore_index=True)
        #Move counter for the first line 0 index
    return df, nombregrupo

if __name__ == "__main__":
    main()