Thanoss commited on
Commit
184c72a
·
verified ·
1 Parent(s): 04c4f07

Upload desription.py

Browse files
Files changed (1) hide show
  1. desription.py +103 -0
desription.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.feature_extraction.text import TfidfVectorizer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ import pickle
5
+ import json
6
+
7
+ class WordDescriptionModel:
8
+ def __init__(self):
9
+ self.vectorizer = TfidfVectorizer()
10
+ self.word_descriptions = {}
11
+ self.word_vectors = None
12
+
13
+ def train(self, word_desc_pairs):
14
+ for word, desc in word_desc_pairs:
15
+ self.word_descriptions[word.lower()] = desc
16
+
17
+ descriptions = list(self.word_descriptions.values())
18
+ self.word_vectors = self.vectorizer.fit_transform(descriptions)
19
+
20
+ def get_description(self, word, similarity_threshold=0.3):
21
+ word = word.lower()
22
+
23
+ if word in self.word_descriptions:
24
+ return True, self.word_descriptions[word]
25
+
26
+ word_vector = self.vectorizer.transform([word])
27
+
28
+ similarities = cosine_similarity(word_vector, self.word_vectors).flatten()
29
+
30
+ max_sim_idx = np.argmax(similarities)
31
+
32
+ if similarities[max_sim_idx] >= similarity_threshold:
33
+ similar_word = list(self.word_descriptions.keys())[max_sim_idx]
34
+ return True, f"Similar to '{similar_word}': {self.word_descriptions[similar_word]}"
35
+ else:
36
+ return False, f"No description available for '{word}'. Please provide one for training."
37
+
38
+ def add_new_word(self, word, description):
39
+
40
+ word = word.lower()
41
+ self.word_descriptions[word] = description
42
+ # Retrain vectors with updated dataset
43
+ descriptions = list(self.word_descriptions.values())
44
+ self.word_vectors = self.vectorizer.fit_transform(descriptions)
45
+
46
+ def save_model(self, filename):
47
+
48
+ model_data = {
49
+ 'word_descriptions': self.word_descriptions,
50
+ 'vectorizer': self.vectorizer,
51
+ 'word_vectors': self.word_vectors
52
+ }
53
+ with open(filename, 'wb') as f:
54
+ pickle.dump(model_data, f)
55
+
56
+ def load_model(self, filename):
57
+
58
+ try:
59
+ with open(filename, 'rb') as f:
60
+ model_data = pickle.load(f)
61
+ self.word_descriptions = model_data['word_descriptions']
62
+ self.vectorizer = model_data['vectorizer']
63
+ self.word_vectors = model_data['word_vectors']
64
+ return True
65
+ except FileNotFoundError:
66
+ return False
67
+
68
+ def main():
69
+ model = WordDescriptionModel()
70
+ model_file = 'word_description_model.pkl'
71
+
72
+ if not model.load_model(model_file):
73
+ print("Training new model with initial data...")
74
+ initial_data = [
75
+ ('software', 'Computer programs and associated documentation and data that provide instructions for computers to perform specific tasks.'),
76
+ ('hardware', 'Physical components that make up a computer system or electronic device.'),
77
+ ('programming', 'Process of creating sets of instructions that tell a computer how to perform tasks.'),
78
+ ('database', 'Organized collection of structured information or data stored electronically in a computer system.'),
79
+ ('algorithm', 'Step-by-step procedure or formula for solving a problem or accomplishing a task.')
80
+ ]
81
+ model.train(initial_data)
82
+
83
+ while True:
84
+ print("\n=== Word Description System ===")
85
+ word = input("Enter a word to get its description (or 'quit' to exit): ").strip()
86
+
87
+ if word.lower() == 'quit':
88
+ break
89
+
90
+ found, description = model.get_description(word)
91
+ print(f"\nResult: {description}")
92
+
93
+ if not found:
94
+ print("\nLet's add this word to our database!")
95
+ new_description = input("Please provide a description for this word: ").strip()
96
+ model.add_new_word(word, new_description)
97
+ print(f"\nThank you! '{word}' has been added to the database.")
98
+
99
+ model.save_model(model_file)
100
+ print("Model has been updated and saved.")
101
+
102
+ if __name__ == "__main__":
103
+ main()