import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import pickle import json class WordDescriptionModel: def __init__(self): self.vectorizer = TfidfVectorizer() self.word_descriptions = {} self.word_vectors = None def train(self, word_desc_pairs): for word, desc in word_desc_pairs: self.word_descriptions[word.lower()] = desc descriptions = list(self.word_descriptions.values()) self.word_vectors = self.vectorizer.fit_transform(descriptions) def get_description(self, word, similarity_threshold=0.3): word = word.lower() if word in self.word_descriptions: return True, self.word_descriptions[word] word_vector = self.vectorizer.transform([word]) similarities = cosine_similarity(word_vector, self.word_vectors).flatten() max_sim_idx = np.argmax(similarities) if similarities[max_sim_idx] >= similarity_threshold: similar_word = list(self.word_descriptions.keys())[max_sim_idx] return True, f"Similar to '{similar_word}': {self.word_descriptions[similar_word]}" else: return False, f"No description available for '{word}'. Please provide one for training." def add_new_word(self, word, description): word = word.lower() self.word_descriptions[word] = description # Retrain vectors with updated dataset descriptions = list(self.word_descriptions.values()) self.word_vectors = self.vectorizer.fit_transform(descriptions) def save_model(self, filename): model_data = { 'word_descriptions': self.word_descriptions, 'vectorizer': self.vectorizer, 'word_vectors': self.word_vectors } with open(filename, 'wb') as f: pickle.dump(model_data, f) def load_model(self, filename): try: with open(filename, 'rb') as f: model_data = pickle.load(f) self.word_descriptions = model_data['word_descriptions'] self.vectorizer = model_data['vectorizer'] self.word_vectors = model_data['word_vectors'] return True except FileNotFoundError: return False def main(): model = WordDescriptionModel() model_file = 'word_description_model.pkl' if not model.load_model(model_file): print("Training new model with initial data...") initial_data = [ ('software', 'Computer programs and associated documentation and data that provide instructions for computers to perform specific tasks.'), ('hardware', 'Physical components that make up a computer system or electronic device.'), ('programming', 'Process of creating sets of instructions that tell a computer how to perform tasks.'), ('database', 'Organized collection of structured information or data stored electronically in a computer system.'), ('algorithm', 'Step-by-step procedure or formula for solving a problem or accomplishing a task.') ] model.train(initial_data) while True: print("\n=== Word Description System ===") word = input("Enter a word to get its description (or 'quit' to exit): ").strip() if word.lower() == 'quit': break found, description = model.get_description(word) print(f"\nResult: {description}") if not found: print("\nLet's add this word to our database!") new_description = input("Please provide a description for this word: ").strip() model.add_new_word(word, new_description) print(f"\nThank you! '{word}' has been added to the database.") model.save_model(model_file) print("Model has been updated and saved.") if __name__ == "__main__": main()