blazingbunny commited on
Commit
a433d47
1 Parent(s): 25259cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -59
app.py CHANGED
@@ -1,84 +1,81 @@
1
- # app.py
2
-
3
  import gradio as gr
4
  import spacy
5
- import subprocess
6
  import json
 
7
 
8
  # Download the spaCy model if it is not already downloaded
9
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_md"])
 
 
 
 
 
 
10
 
11
- # Load the spaCy model for POS tagging and similarity
12
- nlp = spacy.load("en_core_web_md")
13
 
14
- def identify_nouns_verbs(text, existing_nouns, existing_verbs):
 
 
 
15
  # Process the text with spaCy
16
  doc = nlp(text)
17
 
18
- # Extract nouns and verbs with their positions, omitting those already in the input list
19
- nouns = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN" and token.text.lower() not in existing_nouns]
20
- verbs = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB" and token.text.lower() not in existing_verbs]
21
 
22
  return {"Nouns": nouns, "Verbs": verbs}
23
 
24
- def calculate_similarity(nouns_verbs, input_list):
25
- similarities = {"Nouns": {}, "Verbs": {}}
26
-
27
- def add_similarity(word, similar_word, score, pos):
28
- if word not in similarities[pos]:
29
- similarities[pos][word] = []
30
- if similar_word not in [sim[0] for sim in similarities[pos][word]]:
31
- similarities[pos][word].append((similar_word, score))
32
-
33
- for noun in nouns_verbs["Nouns"]:
34
- noun_text = noun["text"]
35
- noun_token = nlp(noun_text)
36
- for word in input_list["Nouns"]:
37
- word_token = nlp(word)
38
- similarity = noun_token.similarity(word_token)
39
- if similarity > 0.8: # Adjust threshold as needed
40
- add_similarity(noun_text, word, similarity, "Nouns")
41
 
42
- for verb in nouns_verbs["Verbs"]:
43
- verb_text = verb["text"]
44
- verb_token = nlp(verb_text)
45
- for word in input_list["Verbs"]:
46
- word_token = nlp(word)
47
- similarity = verb_token.similarity(word_token)
48
- if similarity > 0.8: # Adjust threshold as needed
49
- add_similarity(verb_text, word, similarity, "Verbs")
50
 
51
- return similarities
52
-
53
- def process_inputs(text, json_file):
54
- # Read the content of the uploaded file
55
- with open(json_file.name, 'r') as f:
56
- input_list = json.load(f)
57
 
58
- existing_nouns = [word.lower() for word in input_list["Nouns"]]
59
- existing_verbs = [word.lower() for word in input_list["Verbs"]]
60
-
61
- nouns_verbs = identify_nouns_verbs(text, existing_nouns, existing_verbs)
62
- similarities = calculate_similarity(nouns_verbs, input_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- # Format similarities to match the required output structure
65
- formatted_similarities = {"Nouns": {}, "Verbs": {}}
66
- for pos in ["Nouns", "Verbs"]:
67
- for word, sims in similarities[pos].items():
68
- formatted_similarities[pos][word] = [[sim[0], sim[1]] for sim in sims]
 
 
 
 
 
69
 
70
- return {"Nouns and Verbs": nouns_verbs, "Similarities": formatted_similarities}
71
 
72
  # Create the Gradio interface
73
  iface = gr.Interface(
74
- fn=process_inputs,
75
- inputs=[
76
- gr.Textbox(lines=10, placeholder="Enter your text here..."),
77
- gr.File(label="Upload JSON File")
78
- ],
79
  outputs=gr.JSON(),
80
- title="Noun and Verb Identifier with Similarity Check",
81
- description="Enter a document or text to identify the nouns and verbs, and check for similarities with a given list of words."
82
  )
83
 
84
  if __name__ == "__main__":
 
 
 
1
  import gradio as gr
2
  import spacy
 
3
  import json
4
+ import os
5
 
6
  # Download the spaCy model if it is not already downloaded
7
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
8
+
9
+ # Load the spaCy model for POS tagging
10
+ nlp = spacy.load("en_core_web_sm")
11
+
12
+ # Load the list of nouns and verbs from the JSON file
13
+ json_file_path = "/mnt/data/ED-input_list.json"
14
 
15
+ with open(json_file_path, 'r') as json_file:
16
+ input_list = json.load(json_file)
17
 
18
+ input_nouns = set(input_list["Nouns"])
19
+ input_verbs = set(input_list["Verbs"])
20
+
21
+ def identify_nouns_verbs(text):
22
  # Process the text with spaCy
23
  doc = nlp(text)
24
 
25
+ # Extract nouns and verbs with offsets
26
+ nouns = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
27
+ verbs = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
28
 
29
  return {"Nouns": nouns, "Verbs": verbs}
30
 
31
+ def calculate_similarity(input_text, json_file):
32
+ input_list = json.load(json_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ input_nouns = set(input_list["Nouns"])
35
+ input_verbs = set(input_list["Verbs"])
 
 
 
 
 
 
36
 
37
+ doc = nlp(input_text)
 
 
 
 
 
38
 
39
+ output = {"Nouns": [], "Verbs": [], "Similarities": {"Nouns": {}, "Verbs": {}}}
40
+
41
+ # Find nouns and verbs with offsets
42
+ found_nouns = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
43
+ found_verbs = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
44
+
45
+ output["Nouns"] = [noun for noun in found_nouns if noun["word"] not in input_nouns]
46
+ output["Verbs"] = [verb for verb in found_verbs if verb["word"] not in input_verbs]
47
+
48
+ # Calculate similarity for nouns
49
+ for noun in output["Nouns"]:
50
+ token = nlp(noun["word"])
51
+ similar_words = []
52
+ for input_word in input_nouns:
53
+ input_token = nlp(input_word)
54
+ similarity = token.similarity(input_token)
55
+ if similarity > 0.7:
56
+ similar_words.append((input_word, similarity))
57
+ output["Similarities"]["Nouns"][noun["word"]] = similar_words
58
 
59
+ # Calculate similarity for verbs
60
+ for verb in output["Verbs"]:
61
+ token = nlp(verb["word"])
62
+ similar_words = []
63
+ for input_word in input_verbs:
64
+ input_token = nlp(input_word)
65
+ similarity = token.similarity(input_token)
66
+ if similarity > 0.7:
67
+ similar_words.append((input_word, similarity))
68
+ output["Similarities"]["Verbs"][verb["word"]] = similar_words
69
 
70
+ return output
71
 
72
  # Create the Gradio interface
73
  iface = gr.Interface(
74
+ fn=calculate_similarity,
75
+ inputs=[gr.Textbox(lines=10, placeholder="Enter your text here..."), gr.File(label="Upload JSON List")],
 
 
 
76
  outputs=gr.JSON(),
77
+ title="Noun and Verb Similarity Checker",
78
+ description="Enter a document and upload a JSON list to identify nouns and verbs and find their similarities."
79
  )
80
 
81
  if __name__ == "__main__":