Files changed (1) hide show
  1. app.py +41 -0
app.py CHANGED
@@ -3,6 +3,46 @@ import os
3
  from moviepy.editor import *
4
  import gradio as gr
5
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def parse_string(string, dataset):
8
  parsed_list = []
@@ -59,6 +99,7 @@ print(VideosNames)
59
  def texttoSign(text):
60
  text=text+" "
61
  text=text.lower()
 
62
  text=re.sub('[^a-z]+', ' ', text)
63
  framescount=0
64
  listofwords=parse_string(text,VideosNames)
 
3
  from moviepy.editor import *
4
  import gradio as gr
5
  import re
6
+ import nltk
7
+ from nltk.stem import WordNetLemmatizer
8
+ from nltk.tokenize import word_tokenize
9
+ from nltk.corpus import wordnet
10
+
11
+ nltk.download('punkt')
12
+ nltk.download('wordnet')
13
+ nltk.download('averaged_perceptron_tagger')
14
+
15
+ def get_wordnet_pos(tag):
16
+ if tag.startswith('J'):
17
+ return wordnet.ADJ
18
+ elif tag.startswith('V'):
19
+ return wordnet.VERB
20
+ elif tag.startswith('N'):
21
+ return wordnet.NOUN
22
+ elif tag.startswith('R'):
23
+ return wordnet.ADV
24
+ else:
25
+ return wordnet.NOUN # Default to noun if the POS tag is not found
26
+
27
+ def get_lemma(word):
28
+ lemmatizer = WordNetLemmatizer()
29
+
30
+ tokens = word_tokenize(word)
31
+ tagged_words = nltk.pos_tag(tokens)
32
+ lemmas = []
33
+ for tagged_word in tagged_words:
34
+ word = tagged_word[0]
35
+ pos = tagged_word[1]
36
+ wordnet_pos = get_wordnet_pos(pos)
37
+ lemma = lemmatizer.lemmatize(word, pos=wordnet_pos)
38
+ lemmas.append(lemma)
39
+ return ' '.join(lemmas)
40
+
41
+ def apply_lemma_to_string(sentence):
42
+ words = word_tokenize(sentence)
43
+ lemmas = [get_lemma(word) for word in words]
44
+ return ' '.join(lemmas)
45
+
46
 
47
  def parse_string(string, dataset):
48
  parsed_list = []
 
99
  def texttoSign(text):
100
  text=text+" "
101
  text=text.lower()
102
+ text=apply_lemma_to_string(text)
103
  text=re.sub('[^a-z]+', ' ', text)
104
  framescount=0
105
  listofwords=parse_string(text,VideosNames)