|
import numpy as np
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
import pickle
|
|
import json
|
|
|
|
class WordDescriptionModel:
|
|
def __init__(self):
|
|
self.vectorizer = TfidfVectorizer()
|
|
self.word_descriptions = {}
|
|
self.word_vectors = None
|
|
|
|
def train(self, word_desc_pairs):
|
|
for word, desc in word_desc_pairs:
|
|
self.word_descriptions[word.lower()] = desc
|
|
|
|
descriptions = list(self.word_descriptions.values())
|
|
self.word_vectors = self.vectorizer.fit_transform(descriptions)
|
|
|
|
def get_description(self, word, similarity_threshold=0.3):
|
|
word = word.lower()
|
|
|
|
if word in self.word_descriptions:
|
|
return True, self.word_descriptions[word]
|
|
|
|
word_vector = self.vectorizer.transform([word])
|
|
|
|
similarities = cosine_similarity(word_vector, self.word_vectors).flatten()
|
|
|
|
max_sim_idx = np.argmax(similarities)
|
|
|
|
if similarities[max_sim_idx] >= similarity_threshold:
|
|
similar_word = list(self.word_descriptions.keys())[max_sim_idx]
|
|
return True, f"Similar to '{similar_word}': {self.word_descriptions[similar_word]}"
|
|
else:
|
|
return False, f"No description available for '{word}'. Please provide one for training."
|
|
|
|
def add_new_word(self, word, description):
|
|
|
|
word = word.lower()
|
|
self.word_descriptions[word] = description
|
|
|
|
descriptions = list(self.word_descriptions.values())
|
|
self.word_vectors = self.vectorizer.fit_transform(descriptions)
|
|
|
|
def save_model(self, filename):
|
|
|
|
model_data = {
|
|
'word_descriptions': self.word_descriptions,
|
|
'vectorizer': self.vectorizer,
|
|
'word_vectors': self.word_vectors
|
|
}
|
|
with open(filename, 'wb') as f:
|
|
pickle.dump(model_data, f)
|
|
|
|
def load_model(self, filename):
|
|
|
|
try:
|
|
with open(filename, 'rb') as f:
|
|
model_data = pickle.load(f)
|
|
self.word_descriptions = model_data['word_descriptions']
|
|
self.vectorizer = model_data['vectorizer']
|
|
self.word_vectors = model_data['word_vectors']
|
|
return True
|
|
except FileNotFoundError:
|
|
return False
|
|
|
|
def main():
|
|
model = WordDescriptionModel()
|
|
model_file = 'word_description_model.pkl'
|
|
|
|
if not model.load_model(model_file):
|
|
print("Training new model with initial data...")
|
|
initial_data = [
|
|
('software', 'Computer programs and associated documentation and data that provide instructions for computers to perform specific tasks.'),
|
|
('hardware', 'Physical components that make up a computer system or electronic device.'),
|
|
('programming', 'Process of creating sets of instructions that tell a computer how to perform tasks.'),
|
|
('database', 'Organized collection of structured information or data stored electronically in a computer system.'),
|
|
('algorithm', 'Step-by-step procedure or formula for solving a problem or accomplishing a task.')
|
|
]
|
|
model.train(initial_data)
|
|
|
|
while True:
|
|
print("\n=== Word Description System ===")
|
|
word = input("Enter a word to get its description (or 'quit' to exit): ").strip()
|
|
|
|
if word.lower() == 'quit':
|
|
break
|
|
|
|
found, description = model.get_description(word)
|
|
print(f"\nResult: {description}")
|
|
|
|
if not found:
|
|
print("\nLet's add this word to our database!")
|
|
new_description = input("Please provide a description for this word: ").strip()
|
|
model.add_new_word(word, new_description)
|
|
print(f"\nThank you! '{word}' has been added to the database.")
|
|
|
|
model.save_model(model_file)
|
|
print("Model has been updated and saved.")
|
|
|
|
if __name__ == "__main__":
|
|
main() |