Spaces:

blazingbunny
/

nouns-verbs-identifier-gradio

Sleeping

App Files Files Community

blazingbunny commited on Jun 7

Commit

a433d47

•

1 Parent(s): 25259cb

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -59

app.py CHANGED Viewed

@@ -1,84 +1,81 @@
-# app.py
 import gradio as gr
 import spacy
-import subprocess
 import json
 # Download the spaCy model if it is not already downloaded
-subprocess.run(["python", "-m", "spacy", "download", "en_core_web_md"])
-# Load the spaCy model for POS tagging and similarity
-nlp = spacy.load("en_core_web_md")
-def identify_nouns_verbs(text, existing_nouns, existing_verbs):
     # Process the text with spaCy
     doc = nlp(text)
-    # Extract nouns and verbs with their positions, omitting those already in the input list
-    nouns = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN" and token.text.lower() not in existing_nouns]
-    verbs = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB" and token.text.lower() not in existing_verbs]
     return {"Nouns": nouns, "Verbs": verbs}
-def calculate_similarity(nouns_verbs, input_list):
-    similarities = {"Nouns": {}, "Verbs": {}}
-    def add_similarity(word, similar_word, score, pos):
-        if word not in similarities[pos]:
-            similarities[pos][word] = []
-        if similar_word not in [sim[0] for sim in similarities[pos][word]]:
-            similarities[pos][word].append((similar_word, score))
-    for noun in nouns_verbs["Nouns"]:
-        noun_text = noun["text"]
-        noun_token = nlp(noun_text)
-        for word in input_list["Nouns"]:
-            word_token = nlp(word)
-            similarity = noun_token.similarity(word_token)
-            if similarity > 0.8:  # Adjust threshold as needed
-                add_similarity(noun_text, word, similarity, "Nouns")
-    for verb in nouns_verbs["Verbs"]:
-        verb_text = verb["text"]
-        verb_token = nlp(verb_text)
-        for word in input_list["Verbs"]:
-            word_token = nlp(word)
-            similarity = verb_token.similarity(word_token)
-            if similarity > 0.8:  # Adjust threshold as needed
-                add_similarity(verb_text, word, similarity, "Verbs")
-    return similarities
-def process_inputs(text, json_file):
-    # Read the content of the uploaded file
-    with open(json_file.name, 'r') as f:
-        input_list = json.load(f)
-    existing_nouns = [word.lower() for word in input_list["Nouns"]]
-    existing_verbs = [word.lower() for word in input_list["Verbs"]]
-    nouns_verbs = identify_nouns_verbs(text, existing_nouns, existing_verbs)
-    similarities = calculate_similarity(nouns_verbs, input_list)
-    # Format similarities to match the required output structure
-    formatted_similarities = {"Nouns": {}, "Verbs": {}}
-    for pos in ["Nouns", "Verbs"]:
-        for word, sims in similarities[pos].items():
-            formatted_similarities[pos][word] = [[sim[0], sim[1]] for sim in sims]
-    return {"Nouns and Verbs": nouns_verbs, "Similarities": formatted_similarities}
 # Create the Gradio interface
 iface = gr.Interface(
-    fn=process_inputs,
-    inputs=[
-        gr.Textbox(lines=10, placeholder="Enter your text here..."),
-        gr.File(label="Upload JSON File")
-    ],
     outputs=gr.JSON(),
-    title="Noun and Verb Identifier with Similarity Check",
-    description="Enter a document or text to identify the nouns and verbs, and check for similarities with a given list of words."
 )
 if __name__ == "__main__":

 import gradio as gr
 import spacy
 import json
+import os
 # Download the spaCy model if it is not already downloaded
+subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
+# Load the spaCy model for POS tagging
+nlp = spacy.load("en_core_web_sm")
+# Load the list of nouns and verbs from the JSON file
+json_file_path = "/mnt/data/ED-input_list.json"
+with open(json_file_path, 'r') as json_file:
+    input_list = json.load(json_file)
+input_nouns = set(input_list["Nouns"])
+input_verbs = set(input_list["Verbs"])
+def identify_nouns_verbs(text):
     # Process the text with spaCy
     doc = nlp(text)
+    # Extract nouns and verbs with offsets
+    nouns = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
+    verbs = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
     return {"Nouns": nouns, "Verbs": verbs}
+def calculate_similarity(input_text, json_file):
+    input_list = json.load(json_file)
+    input_nouns = set(input_list["Nouns"])
+    input_verbs = set(input_list["Verbs"])
+    doc = nlp(input_text)
+    output = {"Nouns": [], "Verbs": [], "Similarities": {"Nouns": {}, "Verbs": {}}}
+    # Find nouns and verbs with offsets
+    found_nouns = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
+    found_verbs = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
+    output["Nouns"] = [noun for noun in found_nouns if noun["word"] not in input_nouns]
+    output["Verbs"] = [verb for verb in found_verbs if verb["word"] not in input_verbs]
+    # Calculate similarity for nouns
+    for noun in output["Nouns"]:
+        token = nlp(noun["word"])
+        similar_words = []
+        for input_word in input_nouns:
+            input_token = nlp(input_word)
+            similarity = token.similarity(input_token)
+            if similarity > 0.7:
+                similar_words.append((input_word, similarity))
+        output["Similarities"]["Nouns"][noun["word"]] = similar_words
+    # Calculate similarity for verbs
+    for verb in output["Verbs"]:
+        token = nlp(verb["word"])
+        similar_words = []
+        for input_word in input_verbs:
+            input_token = nlp(input_word)
+            similarity = token.similarity(input_token)
+            if similarity > 0.7:
+                similar_words.append((input_word, similarity))
+        output["Similarities"]["Verbs"][verb["word"]] = similar_words
+    return output
 # Create the Gradio interface
 iface = gr.Interface(
+    fn=calculate_similarity,
+    inputs=[gr.Textbox(lines=10, placeholder="Enter your text here..."), gr.File(label="Upload JSON List")],
     outputs=gr.JSON(),
+    title="Noun and Verb Similarity Checker",
+    description="Enter a document and upload a JSON list to identify nouns and verbs and find their similarities."
 )
 if __name__ == "__main__":