blazingbunny commited on
Commit
e5dd147
1 Parent(s): 5cf55d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -11
app.py CHANGED
@@ -15,34 +15,38 @@ def identify_nouns_verbs(text):
15
  # Process the text with spaCy
16
  doc = nlp(text)
17
 
18
- # Extract nouns and verbs
19
- nouns = [token.text for token in doc if token.pos_ == "NOUN"]
20
- verbs = [token.text for token in doc if token.pos_ == "VERB"]
21
 
22
  return {"Nouns": nouns, "Verbs": verbs}
23
 
24
  def calculate_similarity(nouns_verbs, input_list):
25
  similarities = {"Nouns": {}, "Verbs": {}}
26
 
 
 
 
 
 
 
27
  for noun in nouns_verbs["Nouns"]:
28
- noun_token = nlp(noun)
 
29
  for word in input_list["Nouns"]:
30
  word_token = nlp(word)
31
  similarity = noun_token.similarity(word_token)
32
  if similarity > 0.7: # Adjust threshold as needed
33
- if noun not in similarities["Nouns"]:
34
- similarities["Nouns"][noun] = []
35
- similarities["Nouns"][noun].append((word, similarity))
36
 
37
  for verb in nouns_verbs["Verbs"]:
38
- verb_token = nlp(verb)
 
39
  for word in input_list["Verbs"]:
40
  word_token = nlp(word)
41
  similarity = verb_token.similarity(word_token)
42
  if similarity > 0.7: # Adjust threshold as needed
43
- if verb not in similarities["Verbs"]:
44
- similarities["Verbs"][verb] = []
45
- similarities["Verbs"][verb].append((word, similarity))
46
 
47
  return similarities
48
 
 
15
  # Process the text with spaCy
16
  doc = nlp(text)
17
 
18
+ # Extract nouns and verbs with their positions
19
+ nouns = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
20
+ verbs = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
21
 
22
  return {"Nouns": nouns, "Verbs": verbs}
23
 
24
  def calculate_similarity(nouns_verbs, input_list):
25
  similarities = {"Nouns": {}, "Verbs": {}}
26
 
27
+ def add_similarity(word, similar_word, score, pos):
28
+ if word not in similarities[pos]:
29
+ similarities[pos][word] = []
30
+ if similar_word not in [sim[0] for sim in similarities[pos][word]]:
31
+ similarities[pos][word].append((similar_word, score))
32
+
33
  for noun in nouns_verbs["Nouns"]:
34
+ noun_text = noun["text"]
35
+ noun_token = nlp(noun_text)
36
  for word in input_list["Nouns"]:
37
  word_token = nlp(word)
38
  similarity = noun_token.similarity(word_token)
39
  if similarity > 0.7: # Adjust threshold as needed
40
+ add_similarity(noun_text, word, similarity, "Nouns")
 
 
41
 
42
  for verb in nouns_verbs["Verbs"]:
43
+ verb_text = verb["text"]
44
+ verb_token = nlp(verb_text)
45
  for word in input_list["Verbs"]:
46
  word_token = nlp(word)
47
  similarity = verb_token.similarity(word_token)
48
  if similarity > 0.7: # Adjust threshold as needed
49
+ add_similarity(verb_text, word, similarity, "Verbs")
 
 
50
 
51
  return similarities
52