blazingbunny commited on
Commit
a789703
1 Parent(s): 83cb24a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -57
app.py CHANGED
@@ -1,82 +1,74 @@
 
 
1
  import gradio as gr
2
  import spacy
 
3
  import json
4
- import os
5
- import subprocess # Added import statement for subprocess
6
 
7
  # Download the spaCy model if it is not already downloaded
8
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
9
-
10
- # Load the spaCy model for POS tagging
11
- nlp = spacy.load("en_core_web_sm")
12
-
13
- # Load the list of nouns and verbs from the JSON file
14
- json_file_path = "/mnt/data/ED-input_list.json"
15
 
16
- with open(json_file_path, 'r') as json_file:
17
- input_list = json.load(json_file)
18
-
19
- input_nouns = set(input_list["Nouns"])
20
- input_verbs = set(input_list["Verbs"])
21
 
22
  def identify_nouns_verbs(text):
23
  # Process the text with spaCy
24
  doc = nlp(text)
25
 
26
- # Extract nouns and verbs with offsets
27
- nouns = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
28
- verbs = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
29
 
30
  return {"Nouns": nouns, "Verbs": verbs}
31
 
32
- def calculate_similarity(input_text, json_file):
33
- input_list = json.load(json_file)
34
-
35
- input_nouns = set(input_list["Nouns"])
36
- input_verbs = set(input_list["Verbs"])
37
 
38
- doc = nlp(input_text)
39
-
40
- output = {"Nouns": [], "Verbs": [], "Similarities": {"Nouns": {}, "Verbs": {}}}
41
-
42
- # Find nouns and verbs with offsets
43
- found_nouns = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
44
- found_verbs = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
 
 
 
 
 
 
 
45
 
46
- output["Nouns"] = [noun for noun in found_nouns if noun["word"] not in input_nouns]
47
- output["Verbs"] = [verb for verb in found_verbs if verb["word"] not in input_verbs]
 
 
 
 
 
 
48
 
49
- # Calculate similarity for nouns
50
- for noun in output["Nouns"]:
51
- token = nlp(noun["word"])
52
- similar_words = []
53
- for input_word in input_nouns:
54
- input_token = nlp(input_word)
55
- similarity = token.similarity(input_token)
56
- if similarity > 0.7:
57
- similar_words.append((input_word, similarity))
58
- output["Similarities"]["Nouns"][noun["word"]] = similar_words
59
 
60
- # Calculate similarity for verbs
61
- for verb in output["Verbs"]:
62
- token = nlp(verb["word"])
63
- similar_words = []
64
- for input_word in input_verbs:
65
- input_token = nlp(input_word)
66
- similarity = token.similarity(input_token)
67
- if similarity > 0.7:
68
- similar_words.append((input_word, similarity))
69
- output["Similarities"]["Verbs"][verb["word"]] = similar_words
70
-
71
- return output
72
 
73
  # Create the Gradio interface
74
  iface = gr.Interface(
75
- fn=calculate_similarity,
76
- inputs=[gr.Textbox(lines=10, placeholder="Enter your text here..."), gr.File(label="Upload JSON List")],
 
 
 
77
  outputs=gr.JSON(),
78
- title="Noun and Verb Similarity Checker",
79
- description="Enter a document and upload a JSON list to identify nouns and verbs and find their similarities."
80
  )
81
 
82
  if __name__ == "__main__":
 
1
+ # app.py
2
+
3
  import gradio as gr
4
  import spacy
5
+ import subprocess
6
  import json
 
 
7
 
8
  # Download the spaCy model if it is not already downloaded
9
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_md"])
 
 
 
 
 
 
10
 
11
+ # Load the spaCy model for POS tagging and similarity
12
+ nlp = spacy.load("en_core_web_md")
 
 
 
13
 
14
  def identify_nouns_verbs(text):
15
  # Process the text with spaCy
16
  doc = nlp(text)
17
 
18
+ # Extract nouns and verbs with their positions
19
+ nouns = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
20
+ verbs = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
21
 
22
  return {"Nouns": nouns, "Verbs": verbs}
23
 
24
+ def calculate_similarity(nouns_verbs, input_list):
25
+ similarities = {"Nouns": {}, "Verbs": {}}
 
 
 
26
 
27
+ def add_similarity(word, similar_word, score, pos):
28
+ if word not in similarities[pos]:
29
+ similarities[pos][word] = []
30
+ if similar_word not in [sim[0] for sim in similarities[pos][word]]:
31
+ similarities[pos][word].append((similar_word, score))
32
+
33
+ for noun in nouns_verbs["Nouns"]:
34
+ noun_text = noun["text"]
35
+ noun_token = nlp(noun_text)
36
+ for word in input_list["Nouns"]:
37
+ word_token = nlp(word)
38
+ similarity = noun_token.similarity(word_token)
39
+ if similarity > 0.7: # Adjust threshold as needed
40
+ add_similarity(noun_text, word, similarity, "Nouns")
41
 
42
+ for verb in nouns_verbs["Verbs"]:
43
+ verb_text = verb["text"]
44
+ verb_token = nlp(verb_text)
45
+ for word in input_list["Verbs"]:
46
+ word_token = nlp(word)
47
+ similarity = verb_token.similarity(word_token)
48
+ if similarity > 0.7: # Adjust threshold as needed
49
+ add_similarity(verb_text, word, similarity, "Verbs")
50
 
51
+ return similarities
 
 
 
 
 
 
 
 
 
52
 
53
+ def process_inputs(text, json_file):
54
+ # Read the content of the uploaded file
55
+ with open(json_file.name, 'r') as f:
56
+ input_list = json.load(f)
57
+
58
+ nouns_verbs = identify_nouns_verbs(text)
59
+ similarities = calculate_similarity(nouns_verbs, input_list)
60
+ return {"Nouns and Verbs": nouns_verbs, "Similarities": similarities}
 
 
 
 
61
 
62
  # Create the Gradio interface
63
  iface = gr.Interface(
64
+ fn=process_inputs,
65
+ inputs=[
66
+ gr.Textbox(lines=10, placeholder="Enter your text here..."),
67
+ gr.File(label="Upload JSON File")
68
+ ],
69
  outputs=gr.JSON(),
70
+ title="Noun and Verb Identifier with Similarity Check",
71
+ description="Enter a document or text to identify the nouns and verbs, and check for similarities with a given list of words."
72
  )
73
 
74
  if __name__ == "__main__":