Description_Maker / desription.py
Thanoss's picture
Upload desription.py
184c72a verified
raw
history blame
4.24 kB
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import json
class WordDescriptionModel:
def __init__(self):
self.vectorizer = TfidfVectorizer()
self.word_descriptions = {}
self.word_vectors = None
def train(self, word_desc_pairs):
for word, desc in word_desc_pairs:
self.word_descriptions[word.lower()] = desc
descriptions = list(self.word_descriptions.values())
self.word_vectors = self.vectorizer.fit_transform(descriptions)
def get_description(self, word, similarity_threshold=0.3):
word = word.lower()
if word in self.word_descriptions:
return True, self.word_descriptions[word]
word_vector = self.vectorizer.transform([word])
similarities = cosine_similarity(word_vector, self.word_vectors).flatten()
max_sim_idx = np.argmax(similarities)
if similarities[max_sim_idx] >= similarity_threshold:
similar_word = list(self.word_descriptions.keys())[max_sim_idx]
return True, f"Similar to '{similar_word}': {self.word_descriptions[similar_word]}"
else:
return False, f"No description available for '{word}'. Please provide one for training."
def add_new_word(self, word, description):
word = word.lower()
self.word_descriptions[word] = description
# Retrain vectors with updated dataset
descriptions = list(self.word_descriptions.values())
self.word_vectors = self.vectorizer.fit_transform(descriptions)
def save_model(self, filename):
model_data = {
'word_descriptions': self.word_descriptions,
'vectorizer': self.vectorizer,
'word_vectors': self.word_vectors
}
with open(filename, 'wb') as f:
pickle.dump(model_data, f)
def load_model(self, filename):
try:
with open(filename, 'rb') as f:
model_data = pickle.load(f)
self.word_descriptions = model_data['word_descriptions']
self.vectorizer = model_data['vectorizer']
self.word_vectors = model_data['word_vectors']
return True
except FileNotFoundError:
return False
def main():
model = WordDescriptionModel()
model_file = 'word_description_model.pkl'
if not model.load_model(model_file):
print("Training new model with initial data...")
initial_data = [
('software', 'Computer programs and associated documentation and data that provide instructions for computers to perform specific tasks.'),
('hardware', 'Physical components that make up a computer system or electronic device.'),
('programming', 'Process of creating sets of instructions that tell a computer how to perform tasks.'),
('database', 'Organized collection of structured information or data stored electronically in a computer system.'),
('algorithm', 'Step-by-step procedure or formula for solving a problem or accomplishing a task.')
]
model.train(initial_data)
while True:
print("\n=== Word Description System ===")
word = input("Enter a word to get its description (or 'quit' to exit): ").strip()
if word.lower() == 'quit':
break
found, description = model.get_description(word)
print(f"\nResult: {description}")
if not found:
print("\nLet's add this word to our database!")
new_description = input("Please provide a description for this word: ").strip()
model.add_new_word(word, new_description)
print(f"\nThank you! '{word}' has been added to the database.")
model.save_model(model_file)
print("Model has been updated and saved.")
if __name__ == "__main__":
main()