import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import json

class WordDescriptionModel:
    def __init__(self):
        self.vectorizer = TfidfVectorizer()
        self.word_descriptions = {}
        self.word_vectors = None
        
    def train(self, word_desc_pairs):
        for word, desc in word_desc_pairs:
            self.word_descriptions[word.lower()] = desc
            
        descriptions = list(self.word_descriptions.values())
        self.word_vectors = self.vectorizer.fit_transform(descriptions)
        
    def get_description(self, word, similarity_threshold=0.3):
        word = word.lower()
        
        if word in self.word_descriptions:
            return True, self.word_descriptions[word]
            
        word_vector = self.vectorizer.transform([word])
        
        similarities = cosine_similarity(word_vector, self.word_vectors).flatten()
        
        max_sim_idx = np.argmax(similarities)
        
        if similarities[max_sim_idx] >= similarity_threshold:
            similar_word = list(self.word_descriptions.keys())[max_sim_idx]
            return True, f"Similar to '{similar_word}': {self.word_descriptions[similar_word]}"
        else:
            return False, f"No description available for '{word}'. Please provide one for training."
    
    def add_new_word(self, word, description):
        
        word = word.lower()
        self.word_descriptions[word] = description
        # Retrain vectors with updated dataset
        descriptions = list(self.word_descriptions.values())
        self.word_vectors = self.vectorizer.fit_transform(descriptions)
        
    def save_model(self, filename):
        
        model_data = {
            'word_descriptions': self.word_descriptions,
            'vectorizer': self.vectorizer,
            'word_vectors': self.word_vectors
        }
        with open(filename, 'wb') as f:
            pickle.dump(model_data, f)
            
    def load_model(self, filename):
       
        try:
            with open(filename, 'rb') as f:
                model_data = pickle.load(f)
                self.word_descriptions = model_data['word_descriptions']
                self.vectorizer = model_data['vectorizer']
                self.word_vectors = model_data['word_vectors']
            return True
        except FileNotFoundError:
            return False

def main():
    model = WordDescriptionModel()
    model_file = 'word_description_model.pkl'
    
    if not model.load_model(model_file):
        print("Training new model with initial data...")
        initial_data = [
            ('software', 'Computer programs and associated documentation and data that provide instructions for computers to perform specific tasks.'),
            ('hardware', 'Physical components that make up a computer system or electronic device.'),
            ('programming', 'Process of creating sets of instructions that tell a computer how to perform tasks.'),
            ('database', 'Organized collection of structured information or data stored electronically in a computer system.'),
            ('algorithm', 'Step-by-step procedure or formula for solving a problem or accomplishing a task.')
        ]
        model.train(initial_data)
    
    while True:
        print("\n=== Word Description System ===")
        word = input("Enter a word to get its description (or 'quit' to exit): ").strip()
        
        if word.lower() == 'quit':
            break
            
        found, description = model.get_description(word)
        print(f"\nResult: {description}")
        
        if not found:
            print("\nLet's add this word to our database!")
            new_description = input("Please provide a description for this word: ").strip()
            model.add_new_word(word, new_description)
            print(f"\nThank you! '{word}' has been added to the database.")
            
            model.save_model(model_file)
            print("Model has been updated and saved.")

if __name__ == "__main__":
    main()