crobbi commited on
Commit
0474dc3
·
1 Parent(s): 4791e88

Delete app

Browse files
app/animation.gif DELETED
Binary file (445 kB)
 
app/modelutil.py DELETED
@@ -1,34 +0,0 @@
1
- from tensorflow.python.ops.numpy_ops import np_config
2
- np_config.enable_numpy_behavior()
3
- import os
4
- from tensorflow.keras.models import Sequential
5
- from tensorflow.keras.layers import Conv3D, LSTM, Dense, Dropout, Bidirectional, MaxPool3D, Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
6
-
7
- def load_model() -> Sequential:
8
- model = Sequential()
9
-
10
- model.add(Conv3D(128, 3, input_shape=(75,46,140,1), padding='same'))
11
- model.add(Activation('relu'))
12
- model.add(MaxPool3D((1,2,2)))
13
-
14
- model.add(Conv3D(256, 3, padding='same'))
15
- model.add(Activation('relu'))
16
- model.add(MaxPool3D((1,2,2)))
17
-
18
- model.add(Conv3D(75, 3, padding='same'))
19
- model.add(Activation('relu'))
20
- model.add(MaxPool3D((1,2,2)))
21
-
22
- model.add(TimeDistributed(Flatten()))
23
-
24
- model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
25
- model.add(Dropout(.5))
26
-
27
- model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
28
- model.add(Dropout(.5))
29
-
30
- model.add(Dense(41, kernel_initializer='he_normal', activation='softmax'))
31
- # print("path",os.path.join('..','models','checkpoint'))
32
- model.load_weights(os.path.join('..','models','checkpoint'))
33
-
34
- return model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/streamlitapp.py DELETED
@@ -1,60 +0,0 @@
1
- # Import all of the dependencies
2
- import streamlit as st
3
- import os
4
- import imageio
5
- import numpy as np
6
-
7
- import tensorflow as tf
8
- from utils import load_data, num_to_char
9
- from modelutil import load_model
10
-
11
-
12
- # Set the layout to the streamlit app as wide
13
- st.set_page_config(layout='wide')
14
-
15
- # Setup the sidebar
16
- with st.sidebar:
17
- st.image('https://plus.unsplash.com/premium_photo-1682309676673-392c56015c5c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1000&q=80')
18
- st.title('Lip Reading')
19
- st.info('This application is originally developed from the LipNet deep learning model.')
20
-
21
- st.title('LipNet using StreamLit ✌🏻')
22
- # Generating a list of options or videos
23
- options = os.listdir(os.path.join('..', 'data', 's1'))
24
- selected_video = st.selectbox('Choose video', options)
25
-
26
- # Generate two columns
27
- col1, col2 = st.columns(2)
28
-
29
- if options:
30
-
31
- # Rendering the video
32
- with col1:
33
- st.info('The video below displays the converted video in mp4 format')
34
- file_path = os.path.join('..','data','s1', selected_video)
35
- os.system(f'ffmpeg -i {file_path} -vcodec libx264 test_video.mp4 -y')
36
-
37
- # Rendering inside of the app
38
- video = open('test_video.mp4', 'rb')
39
- video_bytes = video.read()
40
- st.video(video_bytes)
41
-
42
-
43
- with col2:
44
- st.info('👀 This is all the machine learning model sees when making a prediction')
45
- video, annotations,image_data = load_data(tf.convert_to_tensor(file_path))
46
- # st.text(video.shape)
47
- imageio.mimsave('animation.gif',np.squeeze((video * 50).astype(np.uint8)) , duration=100)
48
- st.image('animation.gif', width=400)
49
-
50
- st.info('This is the output of the machine learning model as tokens')
51
- model = load_model()
52
- yhat = model.predict(tf.expand_dims(video, axis=0))
53
- decoder = tf.keras.backend.ctc_decode(yhat, [75], greedy=True)[0][0].numpy()
54
- st.text(decoder)
55
-
56
- # Convert prediction to text
57
- st.info('Decode the raw tokens into words')
58
- converted_prediction = tf.strings.reduce_join(num_to_char(decoder)).numpy().decode('utf-8')
59
- st.text(converted_prediction)
60
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/test_video.mp4 DELETED
Binary file (110 kB)
 
app/utils.py DELETED
@@ -1,52 +0,0 @@
1
- import tensorflow as tf
2
- from typing import List
3
- import numpy as np
4
- import cv2
5
- import os
6
-
7
- vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "]
8
- char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
9
- # Mapping integers back to original characters
10
- num_to_char = tf.keras.layers.StringLookup(
11
- vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
12
- )
13
-
14
- def load_video(path:str) -> List[float]:
15
- #print(path)
16
- cap = cv2.VideoCapture(path)
17
- frames = []
18
- for _ in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):
19
- ret, frame = cap.read()
20
- frame = tf.image.rgb_to_grayscale(frame)
21
- frames.append(frame[190:236,80:220,:])
22
- cap.release()
23
-
24
- mean = tf.math.reduce_mean(frames)
25
- std = tf.math.reduce_std(tf.cast(frames, tf.float32))
26
- return tf.cast((frames - mean), tf.float32) / std
27
-
28
- def load_alignments(path:str) -> List[str]:
29
- #print(path)
30
- with open(path, 'r') as f:
31
- lines = f.readlines()
32
- tokens = []
33
- for line in lines:
34
- line = line.split()
35
- if line[2] != 'sil':
36
- tokens = [*tokens,' ',line[2]]
37
- return char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]
38
-
39
- def load_data(path: str):
40
- path = bytes.decode(path.numpy())
41
- file_name = path.split('/')[-1].split('.')[0]
42
- # File name splitting for windows
43
- file_name = path.split('\\')[-1].split('.')[0]
44
- video_path = os.path.join('..','data','s1',f'{file_name}.mpg')
45
- alignment_path = os.path.join('..','data','alignments','s1',f'{file_name}.align')
46
- frames = load_video(video_path)
47
- print(frames.shape)
48
- alignments = load_alignments(alignment_path)
49
- image_data = (frames * 255).astype(np.uint8)
50
- image_data = np.squeeze(image_data)
51
-
52
- return frames, alignments, image_data