import pandas as pd
from sentence_transformers import SentenceTransformer, util
import re

# Load pre-trained SBERT model
model = SentenceTransformer('all-mpnet-base-v2')

# Load dictionary from CSV file
csv_file_path = './dictionary/dictionary.csv'
df = pd.read_csv(csv_file_path)
dictionary = df['description'].tolist()

def match_word(input_word, dictionary):
    # Extract words from the input
    words = re.findall(r'\w+', input_word.lower())

    # Filter dictionary based on words
    filtered_dictionary = [desc for desc in dictionary if any(word in desc.lower() for word in words)]
    
    print(f"Filtered dictionary size: {len(filtered_dictionary)}")
    # print(f"Filtered dictionary: {filtered_dictionary}")

    # Proceed with SBERT embeddings and cosine similarity on the filtered dictionary
    input_embedding = model.encode(input_word, convert_to_tensor=True)
    similarities = []

    for entry in filtered_dictionary:
        entry_embedding = model.encode(entry, convert_to_tensor=True)
        similarity_score = util.pytorch_cos_sim(input_embedding, entry_embedding).item()
        similarities.append((entry, similarity_score))

    # print(similarities)

    if similarities:
        best_match = max(similarities, key=lambda x: x[1])
        return best_match if best_match[1] > 0.7 else None
    else:
        return None

# Example usage
input_words = [
    "Pepper - Habanero Pepper", "Bananas (12 lbs)", "Squash - Yellow Squash", "Cauliflower", 
    "Squash mix italian/yellow (30 lbs)", "Tomato - Roma Tomato", "Tomato - Grape Tomato",
    "Squash - Mexican Squash", "Pepper - Bell Pepper", "Squash - Italian Squash",
    "Pepper - Red Fresno Pepper", "Tomato - Cherry Tomato", "Pepper - Serrano Pepper",
    "Kale ( 5 lbs)", "Tomato - Beefsteak Tomato", "Pepper - Anaheim Pepper",
    "Banana - Burro Banana", "Squash - Butternut Squash", "Apricot ( 10 lbs)",
    "Squash - Acorn Squash", "Tomato - Heirloom Tomato", "Pepper - Pasilla Pepper",
    "Pepper - Jalapeno Pepper", "carrot (10 lbs )"
]

for input_word in input_words:
    print("Input word:", input_word)
    matched_entry = match_word(input_word, dictionary)
    if matched_entry:
        print("Matched entry:", matched_entry[0])
        print("Similarity score:", matched_entry[1])
    else:
        print("Matched entry: None")
    print()