Spaces:
Sleeping
Sleeping
File size: 3,871 Bytes
5b80433 7e63420 a2865c1 680965d 5b80433 680965d a6ce1bd 680965d a2865c1 680965d a2865c1 5b80433 508f353 a7bc456 07027a4 20ba1ec 0bc287c f4c0758 5b80433 a2865c1 5b80433 f4c0758 5b80433 ecd2790 5b80433 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import cv2
import os
from moviepy.editor import *
import gradio as gr
import re
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
def get_wordnet_pos(tag):
if tag.startswith('J'):
return wordnet.ADJ
elif tag.startswith('V'):
return wordnet.VERB
elif tag.startswith('N'):
return wordnet.NOUN
elif tag.startswith('R'):
return wordnet.ADV
else:
return wordnet.NOUN # Default to noun if the POS tag is not found
def get_lemma(word):
lemmatizer = WordNetLemmatizer()
tokens = word_tokenize(word)
tagged_words = nltk.pos_tag(tokens)
lemmas = []
for tagged_word in tagged_words:
word = tagged_word[0]
pos = tagged_word[1]
wordnet_pos = get_wordnet_pos(pos)
lemma = lemmatizer.lemmatize(word, pos=wordnet_pos)
lemmas.append(lemma)
return ' '.join(lemmas)
def apply_lemma_to_string(sentence):
words = word_tokenize(sentence)
lemmas = [get_lemma(word) for word in words]
return ' '.join(lemmas)
def parse_string(string, dataset):
parsed_list = []
start = 0
otherword=""
end = len(string)
while start < end:
max_chunk = ""
max_length = 0
for chunk in VideosNames:
if string.startswith(chunk.lower(), start) and len(chunk) > max_length:
max_chunk = chunk
max_length = len(chunk)
if max_chunk:
if len(max_chunk)>1:
parsed_list.append(max_chunk)
print(max_chunk)
else:
otherword+=max_chunk
start += len(max_chunk)
else:
parsed_list.append(otherword)
otherword=""
start += 1
return parsed_list
def remove_empty_values(lst):
return [x for x in lst if x and (not isinstance(x, (str, list, dict)) or x)]
def flatten_lists(lst):
flat_list = []
for i in lst:
if type(i) == list:
flat_list.extend(flatten_lists(i))
else:
flat_list.append(i)
return flat_list
path = 'Dataset'
videos = []
VideosNames = []
myList = os.listdir(path)
print(myList)
for cu_video in myList:
current_Video = cv2.imread(f'{path}/{cu_video}')
videos.append(current_Video)
VideosNames.append((os.path.splitext(cu_video)[0]).replace("-"," ").lower())
print(VideosNames)
def texttoSign(text):
text=text+" "
text=text.lower()
#text=apply_lemma_to_string(text)
text=re.sub('[^a-z]+', ' ', text)
framescount=0
listofwords=parse_string(text,VideosNames)
listofwords=remove_empty_values(listofwords)
index=0
for word in listofwords:
if word not in VideosNames:
listofwords[index]=(list(word))
index+=1
listofwords=flatten_lists(listofwords)
clips=[]
for i in range(len(listofwords)):
path="Dataset/"+(listofwords[i])+".mp4"
data=cv2.VideoCapture(path)
framescount = data.get(cv2.CAP_PROP_FRAME_COUNT)
fps = data.get(cv2.CAP_PROP_FPS)
seconds = round(framescount / fps)
clips.append(VideoFileClip(path))
clips[i]=clips[i].subclip(1, seconds/2)
result_clip=concatenate_videoclips(clips, method='compose')
result_clip.write_videofile("combined.mp4", fps=30)
return "combined.mp4"
# except:
# pass
demo=gr.Interface(fn=texttoSign,
inputs="text",
outputs="video",
title="Urdu Text To Sign",
description="This is a small text to sign language model based on Urdu sign langugae standards",
examples=[["good boy"]])
demo.launch(debug=True) |