Spaces:

nfa124
/

text-to-sign

Sleeping

File size: 3,848 Bytes

import cv2
import os
from moviepy.editor import *
import gradio as gr
import re
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

def get_wordnet_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun if the POS tag is not found

def get_lemma(word):
    lemmatizer = WordNetLemmatizer()

    tokens = word_tokenize(word)
    tagged_words = nltk.pos_tag(tokens)
    lemmas = []
    for tagged_word in tagged_words:
        word = tagged_word[0]
        pos = tagged_word[1]
        wordnet_pos = get_wordnet_pos(pos)
        lemma = lemmatizer.lemmatize(word, pos=wordnet_pos)
        lemmas.append(lemma)
    return ' '.join(lemmas)

def apply_lemma_to_string(sentence):
    words = word_tokenize(sentence)
    lemmas = [get_lemma(word) for word in words]
    return ' '.join(lemmas)
    

def parse_string(string, dataset):
    parsed_list = []
    start = 0
    otherword=""
    end = len(string)
    while start < end:
        max_chunk = ""
        max_length = 0
        for chunk in VideosNames:
            if string.startswith(chunk.lower(), start) and len(chunk) > max_length:
                max_chunk = chunk
                max_length = len(chunk)
        if max_chunk:
          if len(max_chunk)>1:
            parsed_list.append(max_chunk)
            print(max_chunk)
          else:
            otherword+=max_chunk
          start += len(max_chunk)
        else:
            parsed_list.append(otherword)
            otherword=""
            start += 1
    return parsed_list


def remove_empty_values(lst):
    return [x for x in lst if x and (not isinstance(x, (str, list, dict)) or x)]


def flatten_lists(lst):
    flat_list = []
    for i in lst:
        if type(i) == list:
            flat_list.extend(flatten_lists(i))
        else:
            flat_list.append(i)
    return flat_list



path = 'dataset'
videos = []
VideosNames = []
myList = os.listdir(path)
print(myList)
for cu_video in myList:
    current_Video = cv2.imread(f'{path}/{cu_video}')
    videos.append(current_Video)
    VideosNames.append((os.path.splitext(cu_video)[0]).replace("-"," ").lower())
print(VideosNames)

def texttoSign(text):
      text=text+" "
      text=text.lower()
      text=apply_lemma_to_string(text)
      text=re.sub('[^a-z]+', ' ', text)
      framescount=0
      listofwords=parse_string(text,VideosNames)
      listofwords=remove_empty_values(listofwords)
      index=0
      for word in listofwords:
        if word not in VideosNames:
          listofwords[index]=(list(word))
        index+=1
      listofwords=flatten_lists(listofwords)
      clips=[]
      for i in range(len(listofwords)):

        path="dataset/"+(listofwords[i])+".mp4"
        data=cv2.VideoCapture(path)
        framescount = data.get(cv2.CAP_PROP_FRAME_COUNT)
        fps = data.get(cv2.CAP_PROP_FPS)
        seconds = round(framescount / fps)
        clips.append(VideoFileClip(path))
        clips[i]=clips[i].subclip(0, seconds)
    
      result_clip=concatenate_videoclips(clips, method='compose')
      result_clip.write_videofile("combined.mp4", fps=30)
      return "combined.mp4"
 # except:
      #  pass


demo=gr.Interface(fn=texttoSign,
                  inputs="text",
                  outputs="video",
                  title="ASL Text To Sign",
                  description="This is a small text to sign language model based on ASL sign langugae standards",
                 examples=[["bad boy"]])

demo.launch(debug=True)