File size: 2,686 Bytes
a789703
 
522d644
 
a789703
dfa043f
32b5c2e
 
a789703
522d644
a789703
 
a433d47
 
522d644
 
 
a789703
 
 
522d644
 
 
a789703
 
addbb21
a789703
 
 
 
 
 
 
 
 
 
 
 
 
 
a433d47
a789703
 
 
 
 
 
 
 
a433d47
a789703
0b19d7f
a789703
 
 
 
 
 
 
 
addbb21
522d644
 
a789703
 
 
 
 
3117cb7
a789703
 
522d644
 
 
09d71fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# app.py

import gradio as gr
import spacy
import subprocess
import json

# Download the spaCy model if it is not already downloaded
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_md"])

# Load the spaCy model for POS tagging and similarity
nlp = spacy.load("en_core_web_md")

def identify_nouns_verbs(text):
    # Process the text with spaCy
    doc = nlp(text)
    
    # Extract nouns and verbs with their positions
    nouns = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
    verbs = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
    
    return {"Nouns": nouns, "Verbs": verbs}

def calculate_similarity(nouns_verbs, input_list):
    similarities = {"Nouns": {}, "Verbs": {}}
    
    def add_similarity(word, similar_word, score, pos):
        if word not in similarities[pos]:
            similarities[pos][word] = []
        if similar_word not in [sim[0] for sim in similarities[pos][word]]:
            similarities[pos][word].append((similar_word, score))

    for noun in nouns_verbs["Nouns"]:
        noun_text = noun["text"]
        noun_token = nlp(noun_text)
        for word in input_list["Nouns"]:
            word_token = nlp(word)
            similarity = noun_token.similarity(word_token)
            if similarity > 0.7:  # Adjust threshold as needed
                add_similarity(noun_text, word, similarity, "Nouns")
    
    for verb in nouns_verbs["Verbs"]:
        verb_text = verb["text"]
        verb_token = nlp(verb_text)
        for word in input_list["Verbs"]:
            word_token = nlp(word)
            similarity = verb_token.similarity(word_token)
            if similarity > 0.7:  # Adjust threshold as needed
                add_similarity(verb_text, word, similarity, "Verbs")
    
    return similarities

def process_inputs(text, json_file):
    # Read the content of the uploaded file
    with open(json_file.name, 'r') as f:
        input_list = json.load(f)
    
    nouns_verbs = identify_nouns_verbs(text)
    similarities = calculate_similarity(nouns_verbs, input_list)
    return {"Nouns and Verbs": nouns_verbs, "Similarities": similarities}

# Create the Gradio interface
iface = gr.Interface(
    fn=process_inputs,
    inputs=[
        gr.Textbox(lines=10, placeholder="Enter your text here..."),
        gr.File(label="Upload JSON File")
    ],
    outputs=gr.JSON(),
    title="Noun and Verb Identifier with Similarity Check",
    description="Enter a document or text to identify the nouns and verbs, and check for similarities with a given list of words."
)

if __name__ == "__main__":
    iface.launch(share=True)