Upload desription.py
Browse files- desription.py +103 -0
desription.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
3 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
+
import pickle
|
5 |
+
import json
|
6 |
+
|
7 |
+
class WordDescriptionModel:
|
8 |
+
def __init__(self):
|
9 |
+
self.vectorizer = TfidfVectorizer()
|
10 |
+
self.word_descriptions = {}
|
11 |
+
self.word_vectors = None
|
12 |
+
|
13 |
+
def train(self, word_desc_pairs):
|
14 |
+
for word, desc in word_desc_pairs:
|
15 |
+
self.word_descriptions[word.lower()] = desc
|
16 |
+
|
17 |
+
descriptions = list(self.word_descriptions.values())
|
18 |
+
self.word_vectors = self.vectorizer.fit_transform(descriptions)
|
19 |
+
|
20 |
+
def get_description(self, word, similarity_threshold=0.3):
|
21 |
+
word = word.lower()
|
22 |
+
|
23 |
+
if word in self.word_descriptions:
|
24 |
+
return True, self.word_descriptions[word]
|
25 |
+
|
26 |
+
word_vector = self.vectorizer.transform([word])
|
27 |
+
|
28 |
+
similarities = cosine_similarity(word_vector, self.word_vectors).flatten()
|
29 |
+
|
30 |
+
max_sim_idx = np.argmax(similarities)
|
31 |
+
|
32 |
+
if similarities[max_sim_idx] >= similarity_threshold:
|
33 |
+
similar_word = list(self.word_descriptions.keys())[max_sim_idx]
|
34 |
+
return True, f"Similar to '{similar_word}': {self.word_descriptions[similar_word]}"
|
35 |
+
else:
|
36 |
+
return False, f"No description available for '{word}'. Please provide one for training."
|
37 |
+
|
38 |
+
def add_new_word(self, word, description):
|
39 |
+
|
40 |
+
word = word.lower()
|
41 |
+
self.word_descriptions[word] = description
|
42 |
+
# Retrain vectors with updated dataset
|
43 |
+
descriptions = list(self.word_descriptions.values())
|
44 |
+
self.word_vectors = self.vectorizer.fit_transform(descriptions)
|
45 |
+
|
46 |
+
def save_model(self, filename):
|
47 |
+
|
48 |
+
model_data = {
|
49 |
+
'word_descriptions': self.word_descriptions,
|
50 |
+
'vectorizer': self.vectorizer,
|
51 |
+
'word_vectors': self.word_vectors
|
52 |
+
}
|
53 |
+
with open(filename, 'wb') as f:
|
54 |
+
pickle.dump(model_data, f)
|
55 |
+
|
56 |
+
def load_model(self, filename):
|
57 |
+
|
58 |
+
try:
|
59 |
+
with open(filename, 'rb') as f:
|
60 |
+
model_data = pickle.load(f)
|
61 |
+
self.word_descriptions = model_data['word_descriptions']
|
62 |
+
self.vectorizer = model_data['vectorizer']
|
63 |
+
self.word_vectors = model_data['word_vectors']
|
64 |
+
return True
|
65 |
+
except FileNotFoundError:
|
66 |
+
return False
|
67 |
+
|
68 |
+
def main():
|
69 |
+
model = WordDescriptionModel()
|
70 |
+
model_file = 'word_description_model.pkl'
|
71 |
+
|
72 |
+
if not model.load_model(model_file):
|
73 |
+
print("Training new model with initial data...")
|
74 |
+
initial_data = [
|
75 |
+
('software', 'Computer programs and associated documentation and data that provide instructions for computers to perform specific tasks.'),
|
76 |
+
('hardware', 'Physical components that make up a computer system or electronic device.'),
|
77 |
+
('programming', 'Process of creating sets of instructions that tell a computer how to perform tasks.'),
|
78 |
+
('database', 'Organized collection of structured information or data stored electronically in a computer system.'),
|
79 |
+
('algorithm', 'Step-by-step procedure or formula for solving a problem or accomplishing a task.')
|
80 |
+
]
|
81 |
+
model.train(initial_data)
|
82 |
+
|
83 |
+
while True:
|
84 |
+
print("\n=== Word Description System ===")
|
85 |
+
word = input("Enter a word to get its description (or 'quit' to exit): ").strip()
|
86 |
+
|
87 |
+
if word.lower() == 'quit':
|
88 |
+
break
|
89 |
+
|
90 |
+
found, description = model.get_description(word)
|
91 |
+
print(f"\nResult: {description}")
|
92 |
+
|
93 |
+
if not found:
|
94 |
+
print("\nLet's add this word to our database!")
|
95 |
+
new_description = input("Please provide a description for this word: ").strip()
|
96 |
+
model.add_new_word(word, new_description)
|
97 |
+
print(f"\nThank you! '{word}' has been added to the database.")
|
98 |
+
|
99 |
+
model.save_model(model_file)
|
100 |
+
print("Model has been updated and saved.")
|
101 |
+
|
102 |
+
if __name__ == "__main__":
|
103 |
+
main()
|