dipesh1701
commited on
Commit
•
635cd84
1
Parent(s):
3caf94a
fix
Browse files
app.py
CHANGED
@@ -23,15 +23,19 @@ secret_token = ""
|
|
23 |
|
24 |
# model = whisper.load_model("base")
|
25 |
|
26 |
-
|
27 |
-
scheduler = EulerDiscreteScheduler.from_pretrained(model_id,
|
28 |
-
subfolder="scheduler")
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
def transcribe(audio):
|
37 |
|
@@ -40,14 +44,14 @@ def transcribe(audio):
|
|
40 |
audio = whisper.pad_or_trim(audio)
|
41 |
|
42 |
# make log-Mel spectrogram and move to the same device as the model
|
43 |
-
mel =
|
44 |
|
45 |
# detect the spoken language
|
46 |
_, probs = model.detect_language(mel)
|
47 |
|
48 |
# decode the audio
|
49 |
-
options =
|
50 |
-
result =
|
51 |
result_text = result.text
|
52 |
|
53 |
# Pass the generated text to Audio
|
@@ -55,7 +59,7 @@ def transcribe(audio):
|
|
55 |
resp = chatgpt_api.send_message(result_text)
|
56 |
out_result = resp['message']
|
57 |
|
58 |
-
out_image =
|
59 |
|
60 |
return [result_text, out_result, out_image]
|
61 |
|
|
|
23 |
|
24 |
# model = whisper.load_model("base")
|
25 |
|
26 |
+
from diffusers import DiffusionPipeline
|
|
|
|
|
27 |
|
28 |
+
pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1")
|
29 |
+
|
30 |
+
# model_id = "stabilityai/stable-diffusion-2"
|
31 |
+
# scheduler = EulerDiscreteScheduler.from_pretrained(model_id,
|
32 |
+
# subfolder="scheduler")
|
33 |
+
|
34 |
+
# pipe = StableDiffusionPipeline.from_pretrained(model_id,
|
35 |
+
# scheduler=scheduler,
|
36 |
+
# revision="fp16",
|
37 |
+
# torch_dtype=torch.float16)
|
38 |
+
# pipe = pipe.to("cuda")
|
39 |
|
40 |
def transcribe(audio):
|
41 |
|
|
|
44 |
audio = whisper.pad_or_trim(audio)
|
45 |
|
46 |
# make log-Mel spectrogram and move to the same device as the model
|
47 |
+
mel = model.log_mel_spectrogram(audio).to(model.device)
|
48 |
|
49 |
# detect the spoken language
|
50 |
_, probs = model.detect_language(mel)
|
51 |
|
52 |
# decode the audio
|
53 |
+
options = model.DecodingOptions()
|
54 |
+
result = model.decode(model, mel, options)
|
55 |
result_text = result.text
|
56 |
|
57 |
# Pass the generated text to Audio
|
|
|
59 |
resp = chatgpt_api.send_message(result_text)
|
60 |
out_result = resp['message']
|
61 |
|
62 |
+
out_image = pipeline(out_result, height=768, width=768).images[0]
|
63 |
|
64 |
return [result_text, out_result, out_image]
|
65 |
|