Spaces:

blazingbunny
/

nouns-verbs-identifier-gradio

Sleeping

App Files Files Community

blazingbunny commited on Jun 7

Commit

0b19d7f

•

1 Parent(s): e5dd147

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -7

app.py CHANGED Viewed

@@ -11,19 +11,19 @@ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_md"])
 # Load the spaCy model for POS tagging and similarity
 nlp = spacy.load("en_core_web_md")
-def identify_nouns_verbs(text):
     # Process the text with spaCy
     doc = nlp(text)
-    # Extract nouns and verbs with their positions
-    nouns = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
-    verbs = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
     return {"Nouns": nouns, "Verbs": verbs}
 def calculate_similarity(nouns_verbs, input_list):
     similarities = {"Nouns": {}, "Verbs": {}}
     def add_similarity(word, similar_word, score, pos):
         if word not in similarities[pos]:
             similarities[pos][word] = []
@@ -55,9 +55,19 @@ def process_inputs(text, json_file):
     with open(json_file.name, 'r') as f:
         input_list = json.load(f)
-    nouns_verbs = identify_nouns_verbs(text)
     similarities = calculate_similarity(nouns_verbs, input_list)
-    return {"Nouns and Verbs": nouns_verbs, "Similarities": similarities}
 # Create the Gradio interface
 iface = gr.Interface(

 # Load the spaCy model for POS tagging and similarity
 nlp = spacy.load("en_core_web_md")
+def identify_nouns_verbs(text, existing_nouns, existing_verbs):
     # Process the text with spaCy
     doc = nlp(text)
+    # Extract nouns and verbs with their positions, omitting those already in the input list
+    nouns = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN" and token.text.lower() not in existing_nouns]
+    verbs = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB" and token.text.lower() not in existing_verbs]
     return {"Nouns": nouns, "Verbs": verbs}
 def calculate_similarity(nouns_verbs, input_list):
     similarities = {"Nouns": {}, "Verbs": {}}
     def add_similarity(word, similar_word, score, pos):
         if word not in similarities[pos]:
             similarities[pos][word] = []
     with open(json_file.name, 'r') as f:
         input_list = json.load(f)
+    existing_nouns = [word.lower() for word in input_list["Nouns"]]
+    existing_verbs = [word.lower() for word in input_list["Verbs"]]
+    nouns_verbs = identify_nouns_verbs(text, existing_nouns, existing_verbs)
     similarities = calculate_similarity(nouns_verbs, input_list)
+    # Format similarities to match the required output structure
+    formatted_similarities = {"Nouns": {}, "Verbs": {}}
+    for pos in ["Nouns", "Verbs"]:
+        for word, sims in similarities[pos].items():
+            formatted_similarities[pos][word] = [[sim[0], sim[1]] for sim in sims]
+    return {"Nouns and Verbs": nouns_verbs, "Similarities": formatted_similarities}
 # Create the Gradio interface
 iface = gr.Interface(