Spaces:
Sleeping
Sleeping
BilalSardar
commited on
Commit
•
b7437c9
1
Parent(s):
6f50c8c
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,46 @@ import os
|
|
3 |
from moviepy.editor import *
|
4 |
import gradio as gr
|
5 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
def parse_string(string, dataset):
|
8 |
parsed_list = []
|
@@ -59,6 +99,7 @@ print(VideosNames)
|
|
59 |
def texttoSign(text):
|
60 |
text=text+" "
|
61 |
text=text.lower()
|
|
|
62 |
text=re.sub('[^a-z]+', ' ', text)
|
63 |
framescount=0
|
64 |
listofwords=parse_string(text,VideosNames)
|
|
|
3 |
from moviepy.editor import *
|
4 |
import gradio as gr
|
5 |
import re
|
6 |
+
import nltk
|
7 |
+
from nltk.stem import WordNetLemmatizer
|
8 |
+
from nltk.tokenize import word_tokenize
|
9 |
+
from nltk.corpus import wordnet
|
10 |
+
|
11 |
+
nltk.download('punkt')
|
12 |
+
nltk.download('wordnet')
|
13 |
+
nltk.download('averaged_perceptron_tagger')
|
14 |
+
|
15 |
+
def get_wordnet_pos(tag):
|
16 |
+
if tag.startswith('J'):
|
17 |
+
return wordnet.ADJ
|
18 |
+
elif tag.startswith('V'):
|
19 |
+
return wordnet.VERB
|
20 |
+
elif tag.startswith('N'):
|
21 |
+
return wordnet.NOUN
|
22 |
+
elif tag.startswith('R'):
|
23 |
+
return wordnet.ADV
|
24 |
+
else:
|
25 |
+
return wordnet.NOUN # Default to noun if the POS tag is not found
|
26 |
+
|
27 |
+
def get_lemma(word):
|
28 |
+
lemmatizer = WordNetLemmatizer()
|
29 |
+
|
30 |
+
tokens = word_tokenize(word)
|
31 |
+
tagged_words = nltk.pos_tag(tokens)
|
32 |
+
lemmas = []
|
33 |
+
for tagged_word in tagged_words:
|
34 |
+
word = tagged_word[0]
|
35 |
+
pos = tagged_word[1]
|
36 |
+
wordnet_pos = get_wordnet_pos(pos)
|
37 |
+
lemma = lemmatizer.lemmatize(word, pos=wordnet_pos)
|
38 |
+
lemmas.append(lemma)
|
39 |
+
return ' '.join(lemmas)
|
40 |
+
|
41 |
+
def apply_lemma_to_string(sentence):
|
42 |
+
words = word_tokenize(sentence)
|
43 |
+
lemmas = [get_lemma(word) for word in words]
|
44 |
+
return ' '.join(lemmas)
|
45 |
+
|
46 |
|
47 |
def parse_string(string, dataset):
|
48 |
parsed_list = []
|
|
|
99 |
def texttoSign(text):
|
100 |
text=text+" "
|
101 |
text=text.lower()
|
102 |
+
text=apply_lemma_to_string(text)
|
103 |
text=re.sub('[^a-z]+', ' ', text)
|
104 |
framescount=0
|
105 |
listofwords=parse_string(text,VideosNames)
|