import pandas as pd
import numpy as np


confusion_set_df = pd.read_csv('./datasets/final_confusion_set.csv')
# Sample data as described

# Initialize an empty dictionary
word_dict = {}

# Populate the dictionary
for _, row in confusion_set_df.iloc[:,:2].iterrows():
    word = row['Word']
    confusion_words = [word.strip() for word in row['Confusions'].split(',')]  # Handle multiple confusion words
    word_dict[word] = confusion_words

    # Add reverse mappings for all confusion words
    for confusion_word in confusion_words:
        if confusion_word not in word_dict:
            word_dict[confusion_word] = []
        if word not in word_dict[confusion_word]:
            word_dict[confusion_word].append(word)

confusion_set = word_dict
# Print the resulting dictionary

confusion_words = []
for word, value in word_dict.items():
    if word is not np.nan and word.count(" ") == 0:
        confusion_words.append(word.strip())
        
post_positions =  [
    "ले", "लागि", "निम्ति", "लाई", "देखि", "बाट", "बाटै", "प्रति", "द्वारा", "को", "का", "की", "मा", 
    "मै", "कै", "हरु", "संग", "संगै", "लगायत", "माथि", "अनुसार", "रहे", "बिना", "तुल्य", "झैँ", 
    "समेत", "चाहिँ", "तर्फ", "तिर", "जस्तो", "जस्ता", "जस्तै", "बीच", "सँग", "सम्म", "वाला", "पट्टि", 
    "बारे", "नै", "भित्र", "माथि", "मुनि", "पछि", "पछाडि", "अगाडि", "अघि", "अनुरूप", "जत्रो", "वाद", 
    "वटा", "मध्ये", "मार्फत", "साथ", "बमोजिम", "खेरि", "निर", "वारि", "पारि"
]

stop_words = [
    "अनि", "अब", "अरू", "आदि", "आफू", "उ", "उन", "उनी", "ऊ",
    "कसरी", "कस्तो", "कि", "किन", "किनकि", "किनभने",
    "के", "केहि", "केही", "को", "चाहीं", "छ", "जता", "जताततै",
    "जब", "जबकि", "जस्ता", "जस्तै", "जस्तो", "जहाँ", ''
    "जुन", "जुनै", "जे", "जो", "जोपनि", "जोपनी",'जस',
    "झैं", "त", "तत्काल", "तथा", "तपाईं", "तब",
    "तर", "तल", "तापनि", "तिनी", "तिनै", "तिमी",
    "ती", "त्यस","त्यसै", "त्यसकारण",  "त्यसो", "त्यस्तै", "त्यस्तो", "त्यहाँ", "त्यहीँ",
    "त्यो", "थिए", "थिएँ", "थियो",
    "देखि", "द्वारा", "न", "नि", "नै", "नौ",
    "पछि", "पछी", "पनि",'पनी',
    "बरु", "बाट",
    "मा", "मेरो", "मै", "यति", "यदि", "यद्यपि", "यसरी", "यसओ", "यस्तै", "यस्तो", "यहाँ",
    "यही", "या", "यी", "यो", "र",'यहि',
    "रे", "लाई", "लाख", "लागि", "ले",
    "वा", "वाट",
    "सँग", "सँगै",
    "सय", "सहित",
    "सहितै",
    "सो",
    "हामी", "हाम्रा", "हाम्रो", "हुँ",
    "म", "तँ", "तिमी",
    "ऊ", "त्यो", "उ", "ती", "उनी", "उहाँ",
    "तिम्रो", "उस",
    "कुन", "कहाँ", "कसै", "सबै", "आफ्नै",
    "हजुर", "वहाँ",
    "हो", "अहो",
    "च", "है", "ल", "लौं",
    "ला", "अथवा", "नत्र",
    "हाइत", "छि", "वाह", "अरे", 
    "कुनै"
]
# stop_words += post_positions
stop_words = list(set(stop_words))


################################## 
 # Reconstruction


def remove_post_positions_stop_words(sentence, confusion_set_words, stop_words, post_positions):
    words = sentence.split()  # Split sentence into words
    temp_filtered_words = []

    # Step 1: Remove postpositions
    for word in words:
        for pos in post_positions:
            if word.endswith(pos):  # Check if the word ends with the postposition
                word = word[: -len(pos)]  # Remove the postposition
                break  # Stop checking once the postposition is removed
        temp_filtered_words.append(word)

    # Step 2: Replace original words with filtered words if they are in confusion_set_words or stop_words
    confusion_words_index = []
    stop_words_index = []
    for index, removed_pp_word in enumerate(temp_filtered_words):
        if removed_pp_word == '':
            continue
        elif removed_pp_word in stop_words:
            words[index] = ''
            stop_words_index.append(index)
        elif removed_pp_word in confusion_set_words:
            words[index] = removed_pp_word
            confusion_words_index.append(index)

    words = filter(lambda x: x != '', words)
    return " ".join(words), stop_words_index, confusion_words_index


def reconstruct_sentence(input_sentence, model_output, stop_words_index,confusion_words_index, post_positions):
    print(input_sentence)
    print(model_output)
    input_words = input_sentence.split()
    model_output_words = model_output.split()

    for index in stop_words_index:
        model_output_words.insert(index, input_words[index])
    

    for index in confusion_words_index:
        for pos in post_positions:
            if input_words[index].endswith(pos):
                model_output_words[index]+= pos
                
    return " ".join(model_output_words)