Thanoss
/

Description_Maker

Model card Files Files and versions Community

Description_Maker / desription.py

Thanoss's picture

Upload desription.py

184c72a verified 4 months ago

4.24 kB

	import numpy as np
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import pickle
	import json

	class WordDescriptionModel:
	def __init__(self):
	self.vectorizer = TfidfVectorizer()
	self.word_descriptions = {}
	self.word_vectors = None

	def train(self, word_desc_pairs):
	for word, desc in word_desc_pairs:
	self.word_descriptions[word.lower()] = desc

	descriptions = list(self.word_descriptions.values())
	self.word_vectors = self.vectorizer.fit_transform(descriptions)

	def get_description(self, word, similarity_threshold=0.3):
	word = word.lower()

	if word in self.word_descriptions:
	return True, self.word_descriptions[word]

	word_vector = self.vectorizer.transform([word])

	similarities = cosine_similarity(word_vector, self.word_vectors).flatten()

	max_sim_idx = np.argmax(similarities)

	if similarities[max_sim_idx] >= similarity_threshold:
	similar_word = list(self.word_descriptions.keys())[max_sim_idx]
	return True, f"Similar to '{similar_word}': {self.word_descriptions[similar_word]}"
	else:
	return False, f"No description available for '{word}'. Please provide one for training."

	def add_new_word(self, word, description):

	word = word.lower()
	self.word_descriptions[word] = description
	# Retrain vectors with updated dataset
	descriptions = list(self.word_descriptions.values())
	self.word_vectors = self.vectorizer.fit_transform(descriptions)

	def save_model(self, filename):

	model_data = {
	'word_descriptions': self.word_descriptions,
	'vectorizer': self.vectorizer,
	'word_vectors': self.word_vectors
	}
	with open(filename, 'wb') as f:
	pickle.dump(model_data, f)

	def load_model(self, filename):

	try:
	with open(filename, 'rb') as f:
	model_data = pickle.load(f)
	self.word_descriptions = model_data['word_descriptions']
	self.vectorizer = model_data['vectorizer']
	self.word_vectors = model_data['word_vectors']
	return True
	except FileNotFoundError:
	return False

	def main():
	model = WordDescriptionModel()
	model_file = 'word_description_model.pkl'

	if not model.load_model(model_file):
	print("Training new model with initial data...")
	initial_data = [
	('software', 'Computer programs and associated documentation and data that provide instructions for computers to perform specific tasks.'),
	('hardware', 'Physical components that make up a computer system or electronic device.'),
	('programming', 'Process of creating sets of instructions that tell a computer how to perform tasks.'),
	('database', 'Organized collection of structured information or data stored electronically in a computer system.'),
	('algorithm', 'Step-by-step procedure or formula for solving a problem or accomplishing a task.')
	]
	model.train(initial_data)

	while True:
	print("\n=== Word Description System ===")
	word = input("Enter a word to get its description (or 'quit' to exit): ").strip()

	if word.lower() == 'quit':
	break

	found, description = model.get_description(word)
	print(f"\nResult: {description}")

	if not found:
	print("\nLet's add this word to our database!")
	new_description = input("Please provide a description for this word: ").strip()
	model.add_new_word(word, new_description)
	print(f"\nThank you! '{word}' has been added to the database.")

	model.save_model(model_file)
	print("Model has been updated and saved.")

	if __name__ == "__main__":
	main()