blazingbunny's picture
Update app.py
a789703 verified
raw
history blame contribute delete
No virus
2.69 kB
# app.py
import gradio as gr
import spacy
import subprocess
import json
# Download the spaCy model if it is not already downloaded
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_md"])
# Load the spaCy model for POS tagging and similarity
nlp = spacy.load("en_core_web_md")
def identify_nouns_verbs(text):
# Process the text with spaCy
doc = nlp(text)
# Extract nouns and verbs with their positions
nouns = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
verbs = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
return {"Nouns": nouns, "Verbs": verbs}
def calculate_similarity(nouns_verbs, input_list):
similarities = {"Nouns": {}, "Verbs": {}}
def add_similarity(word, similar_word, score, pos):
if word not in similarities[pos]:
similarities[pos][word] = []
if similar_word not in [sim[0] for sim in similarities[pos][word]]:
similarities[pos][word].append((similar_word, score))
for noun in nouns_verbs["Nouns"]:
noun_text = noun["text"]
noun_token = nlp(noun_text)
for word in input_list["Nouns"]:
word_token = nlp(word)
similarity = noun_token.similarity(word_token)
if similarity > 0.7: # Adjust threshold as needed
add_similarity(noun_text, word, similarity, "Nouns")
for verb in nouns_verbs["Verbs"]:
verb_text = verb["text"]
verb_token = nlp(verb_text)
for word in input_list["Verbs"]:
word_token = nlp(word)
similarity = verb_token.similarity(word_token)
if similarity > 0.7: # Adjust threshold as needed
add_similarity(verb_text, word, similarity, "Verbs")
return similarities
def process_inputs(text, json_file):
# Read the content of the uploaded file
with open(json_file.name, 'r') as f:
input_list = json.load(f)
nouns_verbs = identify_nouns_verbs(text)
similarities = calculate_similarity(nouns_verbs, input_list)
return {"Nouns and Verbs": nouns_verbs, "Similarities": similarities}
# Create the Gradio interface
iface = gr.Interface(
fn=process_inputs,
inputs=[
gr.Textbox(lines=10, placeholder="Enter your text here..."),
gr.File(label="Upload JSON File")
],
outputs=gr.JSON(),
title="Noun and Verb Identifier with Similarity Check",
description="Enter a document or text to identify the nouns and verbs, and check for similarities with a given list of words."
)
if __name__ == "__main__":
iface.launch(share=True)