BilalSardar commited on
Commit
680965d
1 Parent(s): a6ce1bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -10
app.py CHANGED
@@ -5,21 +5,43 @@ import gradio as gr
5
  import re
6
  import nltk
7
  from nltk.stem import WordNetLemmatizer
 
 
8
 
9
-
10
  nltk.download('wordnet')
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def get_lemma(word):
13
- nltk.download('wordnet')
14
  lemmatizer = WordNetLemmatizer()
15
- lemma = lemmatizer.lemmatize(word)
16
- return lemma
17
-
18
- def apply_lemma_to_list(word_list):
19
- updated_list = []
20
- for word in word_list:
21
- updated_list.append(get_lemma(word))
22
- return updated_list
 
 
 
 
 
 
 
 
23
 
24
  def parse_string(string, dataset):
25
  parsed_list = []
@@ -74,6 +96,7 @@ for cu_video in myList:
74
  print(VideosNames)
75
 
76
  def texttoSign(text):
 
77
  text=text+" "
78
  text=text.lower()
79
  text=re.sub('[^a-z]+', ' ', text)
 
5
  import re
6
  import nltk
7
  from nltk.stem import WordNetLemmatizer
8
+ from nltk.tokenize import word_tokenize
9
+ from nltk.corpus import wordnet
10
 
11
+ nltk.download('punkt')
12
  nltk.download('wordnet')
13
+ nltk.download('averaged_perceptron_tagger')
14
+
15
+ def get_wordnet_pos(tag):
16
+ if tag.startswith('J'):
17
+ return wordnet.ADJ
18
+ elif tag.startswith('V'):
19
+ return wordnet.VERB
20
+ elif tag.startswith('N'):
21
+ return wordnet.NOUN
22
+ elif tag.startswith('R'):
23
+ return wordnet.ADV
24
+ else:
25
+ return wordnet.NOUN # Default to noun if the POS tag is not found
26
 
27
  def get_lemma(word):
 
28
  lemmatizer = WordNetLemmatizer()
29
+
30
+ tokens = word_tokenize(word)
31
+ tagged_words = nltk.pos_tag(tokens)
32
+ lemmas = []
33
+ for tagged_word in tagged_words:
34
+ word = tagged_word[0]
35
+ pos = tagged_word[1]
36
+ wordnet_pos = get_wordnet_pos(pos)
37
+ lemma = lemmatizer.lemmatize(word, pos=wordnet_pos)
38
+ lemmas.append(lemma)
39
+ return ' '.join(lemmas)
40
+
41
+ def apply_lemma_to_string(sentence):
42
+ words = word_tokenize(sentence)
43
+ lemmas = [get_lemma(word) for word in words]
44
+ return ' '.join(lemmas)
45
 
46
  def parse_string(string, dataset):
47
  parsed_list = []
 
96
  print(VideosNames)
97
 
98
  def texttoSign(text):
99
+ text=apply_lemma_to_string(text)
100
  text=text+" "
101
  text=text.lower()
102
  text=re.sub('[^a-z]+', ' ', text)