# app.py import gradio as gr import spacy import subprocess import json # Download the spaCy model if it is not already downloaded subprocess.run(["python", "-m", "spacy", "download", "en_core_web_md"]) # Load the spaCy model for POS tagging and similarity nlp = spacy.load("en_core_web_md") def identify_nouns_verbs(text, existing_nouns, existing_verbs): # Process the text with spaCy doc = nlp(text) # Extract nouns and verbs with their positions, omitting those already in the input list nouns = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN" and token.text.lower() not in existing_nouns] verbs = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB" and token.text.lower() not in existing_verbs] return {"Nouns": nouns, "Verbs": verbs} def calculate_similarity(nouns_verbs, input_list): similarities = {"Nouns": {}, "Verbs": {}} def add_similarity(word, similar_word, score, pos): if word not in similarities[pos]: similarities[pos][word] = [] if similar_word not in [sim[0] for sim in similarities[pos][word]]: similarities[pos][word].append((similar_word, score)) for noun in nouns_verbs["Nouns"]: noun_text = noun["text"] noun_token = nlp(noun_text) for word in input_list["Nouns"]: word_token = nlp(word) similarity = noun_token.similarity(word_token) if similarity > 0.7: # Adjust threshold as needed add_similarity(noun_text, word, similarity, "Nouns") for verb in nouns_verbs["Verbs"]: verb_text = verb["text"] verb_token = nlp(verb_text) for word in input_list["Verbs"]: word_token = nlp(word) similarity = verb_token.similarity(word_token) if similarity > 0.7: # Adjust threshold as needed add_similarity(verb_text, word, similarity, "Verbs") return similarities def process_inputs(text, json_file): # Read the content of the uploaded file with open(json_file.name, 'r') as f: input_list = json.load(f) existing_nouns = [word.lower() for word in input_list["Nouns"]] existing_verbs = [word.lower() for word in input_list["Verbs"]] nouns_verbs = identify_nouns_verbs(text, existing_nouns, existing_verbs) similarities = calculate_similarity(nouns_verbs, input_list) # Format similarities to match the required output structure formatted_similarities = {"Nouns": {}, "Verbs": {}} for pos in ["Nouns", "Verbs"]: for word, sims in similarities[pos].items(): formatted_similarities[pos][word] = [[sim[0], sim[1]] for sim in sims] return {"Nouns and Verbs": nouns_verbs, "Similarities": formatted_similarities} # Create the Gradio interface iface = gr.Interface( fn=process_inputs, inputs=[ gr.Textbox(lines=10, placeholder="Enter your text here..."), gr.File(label="Upload JSON File") ], outputs=gr.JSON(), title="Noun and Verb Identifier with Similarity Check", description="Enter a document or text to identify the nouns and verbs, and check for similarities with a given list of words." ) if __name__ == "__main__": iface.launch(share=True)