Spaces:

dipesh1701
/

stable-diffusion-with-chatgpt

Running

dipesh1701 commited on Jan 27, 2023

Commit

635cd84

•

1 Parent(s): 3caf94a

fix

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,15 +23,19 @@ secret_token = ""
 # model = whisper.load_model("base")
-model_id = "stabilityai/stable-diffusion-2"
-scheduler = EulerDiscreteScheduler.from_pretrained(model_id,
-                                                   subfolder="scheduler")
-pipe = StableDiffusionPipeline.from_pretrained(model_id,
-                                               scheduler=scheduler,
-                                               revision="fp16",
-                                               torch_dtype=torch.float16)
-pipe = pipe.to("cuda")
 def transcribe(audio):
@@ -40,14 +44,14 @@ def transcribe(audio):
     audio = whisper.pad_or_trim(audio)
     # make log-Mel spectrogram and move to the same device as the model
-    mel = whisper.log_mel_spectrogram(audio).to(model.device)
     # detect the spoken language
     _, probs = model.detect_language(mel)
     # decode the audio
-    options = whisper.DecodingOptions()
-    result = whisper.decode(model, mel, options)
     result_text = result.text
     # Pass the generated text to Audio
@@ -55,7 +59,7 @@ def transcribe(audio):
     resp = chatgpt_api.send_message(result_text)
     out_result = resp['message']
-    out_image = pipe(out_result, height=768, width=768).images[0]
     return [result_text, out_result, out_image]

 # model = whisper.load_model("base")
+from diffusers import DiffusionPipeline
+pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1")
+# model_id = "stabilityai/stable-diffusion-2"
+# scheduler = EulerDiscreteScheduler.from_pretrained(model_id,
+#                                                    subfolder="scheduler")
+# pipe = StableDiffusionPipeline.from_pretrained(model_id,
+#                                                scheduler=scheduler,
+#                                                revision="fp16",
+#                                                torch_dtype=torch.float16)
+# pipe = pipe.to("cuda")
 def transcribe(audio):
     audio = whisper.pad_or_trim(audio)
     # make log-Mel spectrogram and move to the same device as the model
+    mel = model.log_mel_spectrogram(audio).to(model.device)
     # detect the spoken language
     _, probs = model.detect_language(mel)
     # decode the audio
+    options = model.DecodingOptions()
+    result = model.decode(model, mel, options)
     result_text = result.text
     # Pass the generated text to Audio
     resp = chatgpt_api.send_message(result_text)
     out_result = resp['message']
+    out_image = pipeline(out_result, height=768, width=768).images[0]
     return [result_text, out_result, out_image]