Spaces:

BilalSardar
/

Urdu-Text-To-Sign

Sleeping

App Files Files Community

Urdu-Text-To-Sign / app.py

BilalSardar

Update app.py

20ba1ec about 1 year ago

raw history blame contribute delete

No virus

3.87 kB

	import cv2
	import os
	from moviepy.editor import *
	import gradio as gr
	import re
	import nltk
	from nltk.stem import WordNetLemmatizer
	from nltk.tokenize import word_tokenize
	from nltk.corpus import wordnet

	nltk.download('punkt')
	nltk.download('wordnet')
	nltk.download('averaged_perceptron_tagger')

	def get_wordnet_pos(tag):
	if tag.startswith('J'):
	return wordnet.ADJ
	elif tag.startswith('V'):
	return wordnet.VERB
	elif tag.startswith('N'):
	return wordnet.NOUN
	elif tag.startswith('R'):
	return wordnet.ADV
	else:
	return wordnet.NOUN # Default to noun if the POS tag is not found

	def get_lemma(word):
	lemmatizer = WordNetLemmatizer()

	tokens = word_tokenize(word)
	tagged_words = nltk.pos_tag(tokens)
	lemmas = []
	for tagged_word in tagged_words:
	word = tagged_word[0]
	pos = tagged_word[1]
	wordnet_pos = get_wordnet_pos(pos)
	lemma = lemmatizer.lemmatize(word, pos=wordnet_pos)
	lemmas.append(lemma)
	return ' '.join(lemmas)

	def apply_lemma_to_string(sentence):
	words = word_tokenize(sentence)
	lemmas = [get_lemma(word) for word in words]
	return ' '.join(lemmas)

	def parse_string(string, dataset):
	parsed_list = []
	start = 0
	otherword=""
	end = len(string)
	while start < end:
	max_chunk = ""
	max_length = 0
	for chunk in VideosNames:
	if string.startswith(chunk.lower(), start) and len(chunk) > max_length:
	max_chunk = chunk
	max_length = len(chunk)
	if max_chunk:
	if len(max_chunk)>1:
	parsed_list.append(max_chunk)
	print(max_chunk)
	else:
	otherword+=max_chunk
	start += len(max_chunk)
	else:
	parsed_list.append(otherword)
	otherword=""
	start += 1
	return parsed_list


	def remove_empty_values(lst):
	return [x for x in lst if x and (not isinstance(x, (str, list, dict)) or x)]


	def flatten_lists(lst):
	flat_list = []
	for i in lst:
	if type(i) == list:
	flat_list.extend(flatten_lists(i))
	else:
	flat_list.append(i)
	return flat_list



	path = 'Dataset'
	videos = []
	VideosNames = []
	myList = os.listdir(path)
	print(myList)
	for cu_video in myList:
	current_Video = cv2.imread(f'{path}/{cu_video}')
	videos.append(current_Video)
	VideosNames.append((os.path.splitext(cu_video)[0]).replace("-"," ").lower())
	print(VideosNames)

	def texttoSign(text):

	text=text+" "
	text=text.lower()
	#text=apply_lemma_to_string(text)
	text=re.sub('[^a-z]+', ' ', text)
	framescount=0
	listofwords=parse_string(text,VideosNames)
	listofwords=remove_empty_values(listofwords)
	index=0
	for word in listofwords:
	if word not in VideosNames:
	listofwords[index]=(list(word))

	index+=1
	listofwords=flatten_lists(listofwords)
	clips=[]
	for i in range(len(listofwords)):

	path="Dataset/"+(listofwords[i])+".mp4"
	data=cv2.VideoCapture(path)
	framescount = data.get(cv2.CAP_PROP_FRAME_COUNT)
	fps = data.get(cv2.CAP_PROP_FPS)
	seconds = round(framescount / fps)
	clips.append(VideoFileClip(path))
	clips[i]=clips[i].subclip(1, seconds/2)

	result_clip=concatenate_videoclips(clips, method='compose')
	result_clip.write_videofile("combined.mp4", fps=30)
	return "combined.mp4"
	# except:
	# pass


	demo=gr.Interface(fn=texttoSign,
	inputs="text",
	outputs="video",
	title="Urdu Text To Sign",
	description="This is a small text to sign language model based on Urdu sign langugae standards",
	examples=[["good boy"]])

	demo.launch(debug=True)