awacke1 commited on
Commit
561b096
1 Parent(s): 0841b6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -46
app.py CHANGED
@@ -1,22 +1,22 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import io, base64
4
- from PIL import Image
5
  import numpy as np
6
  import tensorflow as tf
7
  import mediapy
8
  import os
9
  import sys
10
- from huggingface_hub import snapshot_download
11
-
12
  import streamlit as st
13
  import firebase_admin
14
- from firebase_admin import credentials
15
- from firebase_admin import firestore
16
  import datetime
17
 
 
 
 
 
 
 
18
 
19
- # load cloud firestore client which establishes a connection to dataset where we persist data
20
  @st.experimental_singleton
21
  def get_db_firestore():
22
  cred = credentials.Certificate('test.json')
@@ -24,16 +24,23 @@ def get_db_firestore():
24
  db = firestore.client()
25
  return db
26
 
27
- #start it up
28
  db = get_db_firestore()
 
 
29
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
30
 
 
 
 
 
 
 
 
31
  def transcribe(audio):
32
  text = asr(audio)["text"]
33
  return text
34
 
35
- classifier = pipeline("text-classification")
36
-
37
  def speech_to_text(speech):
38
  text = asr(speech)["text"]
39
  return text
@@ -67,50 +74,34 @@ def selectall(text):
67
  r=(f'{doc.id} => {doc.to_dict()}')
68
  doclist += r
69
  return doclist
70
-
71
- #demo = gr.Blocks()
72
 
73
-
74
-
75
- #demo.launch(share=True)
76
-
77
-
78
- # 1. GPT-J: Story Generation Pipeline
79
- story_gen = pipeline("text-generation", "pranavpsv/gpt2-genre-story-generator")
80
-
81
- # 2. LatentDiffusion: Latent Diffusion Interface
82
  image_gen = gr.Interface.load("spaces/multimodalart/latentdiffusion")
83
 
84
- # 3. FILM: Frame Interpolation Model (code re-use from spaces/akhaliq/frame-interpolation/tree/main)
85
  os.system("git clone https://github.com/google-research/frame-interpolation")
86
  sys.path.append("frame-interpolation")
87
- from eval import interpolator, util
88
-
89
  ffmpeg_path = util.get_ffmpeg_path()
90
  mediapy.set_ffmpeg(ffmpeg_path)
91
-
92
  model = snapshot_download(repo_id="akhaliq/frame-interpolation-film-style")
93
  interpolator = interpolator.Interpolator(model, None)
94
 
 
95
  def generate_story(choice, input_text):
96
  query = "<BOS> <{0}> {1}".format(choice, input_text)
97
-
98
- print(query)
99
  generated_text = story_gen(query)
100
  generated_text = generated_text[0]['generated_text']
101
  generated_text = generated_text.split('> ')[2]
102
-
103
  return generated_text
104
 
105
- def generate_images(generated_text):
 
106
  steps=50
107
  width=256
108
  height=256
109
  num_images=4
110
  diversity=6
111
- image_bytes = image_gen(generated_text, steps, width, height, num_images, diversity)
112
-
113
- # Algo from spaces/Gradio-Blocks/latent_gpt2_story/blob/main/app.py
114
  generated_images = []
115
  for image in image_bytes[1]:
116
  image_str = image[0]
@@ -118,56 +109,44 @@ def generate_images(generated_text):
118
  decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
119
  img = Image.open(io.BytesIO(decoded_bytes))
120
  generated_images.append(img)
121
-
122
  return generated_images
123
 
 
124
  def generate_interpolation(gallery):
125
  times_to_interpolate = 4
126
-
127
  generated_images = []
128
  for image_str in gallery:
129
  image_str = image_str.replace("data:image/png;base64,","")
130
  decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
131
  img = Image.open(io.BytesIO(decoded_bytes))
132
  generated_images.append(img)
133
-
134
  generated_images[0].save('frame_0.png')
135
  generated_images[1].save('frame_1.png')
136
  generated_images[2].save('frame_2.png')
137
  generated_images[3].save('frame_3.png')
138
-
139
  input_frames = ["frame_0.png", "frame_1.png", "frame_2.png", "frame_3.png"]
140
-
141
  frames = list(util.interpolate_recursively_from_files(input_frames, times_to_interpolate, interpolator))
142
-
143
  mediapy.write_video("out.mp4", frames, fps=15)
144
-
145
  return "out.mp4"
146
-
147
-
148
 
149
  demo = gr.Blocks()
150
 
151
  with demo:
152
- #audio_file = gr.Audio(type="filepath")
153
  audio_file = gr.inputs.Audio(source="microphone", type="filepath")
154
  text = gr.Textbox()
155
  label = gr.Label()
156
  saved = gr.Textbox()
157
- savedAll = gr.Textbox()
158
-
159
  b1 = gr.Button("Recognize Speech")
160
  b2 = gr.Button("Classify Sentiment")
161
  b3 = gr.Button("Save Speech to Text")
162
  b4 = gr.Button("Retrieve All")
163
-
164
  b1.click(speech_to_text, inputs=audio_file, outputs=text)
165
  b2.click(text_to_sentiment, inputs=text, outputs=label)
166
  b3.click(upsert, inputs=text, outputs=saved)
167
  b4.click(selectall, inputs=text, outputs=savedAll)
168
 
169
  with gr.Row():
170
-
171
  # Left column (inputs)
172
  with gr.Column():
173
  input_story_type = gr.Radio(choices=['superhero', 'action', 'drama', 'horror', 'thriller', 'sci_fi'], value='sci_fi', label="Genre")
 
1
  import gradio as gr
 
2
  import io, base64
 
3
  import numpy as np
4
  import tensorflow as tf
5
  import mediapy
6
  import os
7
  import sys
 
 
8
  import streamlit as st
9
  import firebase_admin
 
 
10
  import datetime
11
 
12
+ from transformers import pipeline
13
+ from PIL import Image
14
+ from huggingface_hub import snapshot_download
15
+ from firebase_admin import credentials
16
+ from firebase_admin import firestore
17
+ from eval import interpolator, util
18
 
19
+ # firestore singleton is a cached multiuser instance to persist shared crowdsource memory
20
  @st.experimental_singleton
21
  def get_db_firestore():
22
  cred = credentials.Certificate('test.json')
 
24
  db = firestore.client()
25
  return db
26
 
27
+ #start firestore singleton
28
  db = get_db_firestore()
29
+
30
+ # create ASR ML pipeline
31
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
32
 
33
+ # create Text Classification pipeline
34
+ classifier = pipeline("text-classification")
35
+
36
+ # create text generator pipeline
37
+ story_gen = pipeline("text-generation", "pranavpsv/gpt2-genre-story-generator")
38
+
39
+ # transcribe function
40
  def transcribe(audio):
41
  text = asr(audio)["text"]
42
  return text
43
 
 
 
44
  def speech_to_text(speech):
45
  text = asr(speech)["text"]
46
  return text
 
74
  r=(f'{doc.id} => {doc.to_dict()}')
75
  doclist += r
76
  return doclist
 
 
77
 
78
+ # image generator
 
 
 
 
 
 
 
 
79
  image_gen = gr.Interface.load("spaces/multimodalart/latentdiffusion")
80
 
81
+ # video generator
82
  os.system("git clone https://github.com/google-research/frame-interpolation")
83
  sys.path.append("frame-interpolation")
 
 
84
  ffmpeg_path = util.get_ffmpeg_path()
85
  mediapy.set_ffmpeg(ffmpeg_path)
 
86
  model = snapshot_download(repo_id="akhaliq/frame-interpolation-film-style")
87
  interpolator = interpolator.Interpolator(model, None)
88
 
89
+ # story gen
90
  def generate_story(choice, input_text):
91
  query = "<BOS> <{0}> {1}".format(choice, input_text)
 
 
92
  generated_text = story_gen(query)
93
  generated_text = generated_text[0]['generated_text']
94
  generated_text = generated_text.split('> ')[2]
 
95
  return generated_text
96
 
97
+ # images gen
98
+ def generate_images(text):
99
  steps=50
100
  width=256
101
  height=256
102
  num_images=4
103
  diversity=6
104
+ image_bytes = image_gen(text, steps, width, height, num_images, diversity)
 
 
105
  generated_images = []
106
  for image in image_bytes[1]:
107
  image_str = image[0]
 
109
  decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
110
  img = Image.open(io.BytesIO(decoded_bytes))
111
  generated_images.append(img)
 
112
  return generated_images
113
 
114
+ # reductionism - interpolate 4 images - todo - unhardcode the pattern
115
  def generate_interpolation(gallery):
116
  times_to_interpolate = 4
 
117
  generated_images = []
118
  for image_str in gallery:
119
  image_str = image_str.replace("data:image/png;base64,","")
120
  decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
121
  img = Image.open(io.BytesIO(decoded_bytes))
122
  generated_images.append(img)
 
123
  generated_images[0].save('frame_0.png')
124
  generated_images[1].save('frame_1.png')
125
  generated_images[2].save('frame_2.png')
126
  generated_images[3].save('frame_3.png')
 
127
  input_frames = ["frame_0.png", "frame_1.png", "frame_2.png", "frame_3.png"]
 
128
  frames = list(util.interpolate_recursively_from_files(input_frames, times_to_interpolate, interpolator))
 
129
  mediapy.write_video("out.mp4", frames, fps=15)
 
130
  return "out.mp4"
 
 
131
 
132
  demo = gr.Blocks()
133
 
134
  with demo:
 
135
  audio_file = gr.inputs.Audio(source="microphone", type="filepath")
136
  text = gr.Textbox()
137
  label = gr.Label()
138
  saved = gr.Textbox()
139
+ savedAll = gr.Textbox()
 
140
  b1 = gr.Button("Recognize Speech")
141
  b2 = gr.Button("Classify Sentiment")
142
  b3 = gr.Button("Save Speech to Text")
143
  b4 = gr.Button("Retrieve All")
 
144
  b1.click(speech_to_text, inputs=audio_file, outputs=text)
145
  b2.click(text_to_sentiment, inputs=text, outputs=label)
146
  b3.click(upsert, inputs=text, outputs=saved)
147
  b4.click(selectall, inputs=text, outputs=savedAll)
148
 
149
  with gr.Row():
 
150
  # Left column (inputs)
151
  with gr.Column():
152
  input_story_type = gr.Radio(choices=['superhero', 'action', 'drama', 'horror', 'thriller', 'sci_fi'], value='sci_fi', label="Genre")