Spaces:
Running
Running
Commit
•
680965d
1
Parent(s):
a6ce1bd
Update app.py
Browse files
app.py
CHANGED
@@ -5,21 +5,43 @@ import gradio as gr
|
|
5 |
import re
|
6 |
import nltk
|
7 |
from nltk.stem import WordNetLemmatizer
|
|
|
|
|
8 |
|
9 |
-
|
10 |
nltk.download('wordnet')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def get_lemma(word):
|
13 |
-
nltk.download('wordnet')
|
14 |
lemmatizer = WordNetLemmatizer()
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
def parse_string(string, dataset):
|
25 |
parsed_list = []
|
@@ -74,6 +96,7 @@ for cu_video in myList:
|
|
74 |
print(VideosNames)
|
75 |
|
76 |
def texttoSign(text):
|
|
|
77 |
text=text+" "
|
78 |
text=text.lower()
|
79 |
text=re.sub('[^a-z]+', ' ', text)
|
|
|
5 |
import re
|
6 |
import nltk
|
7 |
from nltk.stem import WordNetLemmatizer
|
8 |
+
from nltk.tokenize import word_tokenize
|
9 |
+
from nltk.corpus import wordnet
|
10 |
|
11 |
+
nltk.download('punkt')
|
12 |
nltk.download('wordnet')
|
13 |
+
nltk.download('averaged_perceptron_tagger')
|
14 |
+
|
15 |
+
def get_wordnet_pos(tag):
|
16 |
+
if tag.startswith('J'):
|
17 |
+
return wordnet.ADJ
|
18 |
+
elif tag.startswith('V'):
|
19 |
+
return wordnet.VERB
|
20 |
+
elif tag.startswith('N'):
|
21 |
+
return wordnet.NOUN
|
22 |
+
elif tag.startswith('R'):
|
23 |
+
return wordnet.ADV
|
24 |
+
else:
|
25 |
+
return wordnet.NOUN # Default to noun if the POS tag is not found
|
26 |
|
27 |
def get_lemma(word):
|
|
|
28 |
lemmatizer = WordNetLemmatizer()
|
29 |
+
|
30 |
+
tokens = word_tokenize(word)
|
31 |
+
tagged_words = nltk.pos_tag(tokens)
|
32 |
+
lemmas = []
|
33 |
+
for tagged_word in tagged_words:
|
34 |
+
word = tagged_word[0]
|
35 |
+
pos = tagged_word[1]
|
36 |
+
wordnet_pos = get_wordnet_pos(pos)
|
37 |
+
lemma = lemmatizer.lemmatize(word, pos=wordnet_pos)
|
38 |
+
lemmas.append(lemma)
|
39 |
+
return ' '.join(lemmas)
|
40 |
+
|
41 |
+
def apply_lemma_to_string(sentence):
|
42 |
+
words = word_tokenize(sentence)
|
43 |
+
lemmas = [get_lemma(word) for word in words]
|
44 |
+
return ' '.join(lemmas)
|
45 |
|
46 |
def parse_string(string, dataset):
|
47 |
parsed_list = []
|
|
|
96 |
print(VideosNames)
|
97 |
|
98 |
def texttoSign(text):
|
99 |
+
text=apply_lemma_to_string(text)
|
100 |
text=text+" "
|
101 |
text=text.lower()
|
102 |
text=re.sub('[^a-z]+', ' ', text)
|