Spaces:
Sleeping
Sleeping
neuralleap
commited on
Commit
•
b42ce6c
1
Parent(s):
5eae5c5
Update main.py
Browse files
main.py
CHANGED
@@ -5,6 +5,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
5 |
import os
|
6 |
import io
|
7 |
import json
|
|
|
8 |
#import httpcore
|
9 |
#setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
|
10 |
|
@@ -21,6 +22,41 @@ app.add_middleware(
|
|
21 |
)
|
22 |
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
#text_to_speech bock===========================================================================
|
26 |
|
@@ -37,10 +73,6 @@ def translate(text,language):
|
|
37 |
|
38 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
39 |
from TTS.api import TTS
|
40 |
-
|
41 |
-
import torch
|
42 |
-
from TTS.api import TTS
|
43 |
-
|
44 |
# Get device
|
45 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
46 |
|
|
|
5 |
import os
|
6 |
import io
|
7 |
import json
|
8 |
+
import torch
|
9 |
#import httpcore
|
10 |
#setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
|
11 |
|
|
|
22 |
)
|
23 |
|
24 |
|
25 |
+
#============speech_to_text=======================================================
|
26 |
+
from transformers import pipeline
|
27 |
+
from transformers.pipelines.audio_utils import ffmpeg_read
|
28 |
+
|
29 |
+
import tempfile
|
30 |
+
import os
|
31 |
+
|
32 |
+
MODEL_NAME = "openai/whisper-base"
|
33 |
+
BATCH_SIZE = 8
|
34 |
+
FILE_LIMIT_MB = 1000
|
35 |
+
YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
|
36 |
+
|
37 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
38 |
+
|
39 |
+
pipe = pipeline(
|
40 |
+
task="automatic-speech-recognition",
|
41 |
+
model=MODEL_NAME,
|
42 |
+
chunk_length_s=30,
|
43 |
+
device=device,
|
44 |
+
)
|
45 |
+
@app.post("/speech_to_text_whispher")
|
46 |
+
async def speech_to_text_whispher(file: UploadFile = File(...)):
|
47 |
+
file_path = "inputvoice.mp3"
|
48 |
+
|
49 |
+
with open(file_path, "wb") as f:
|
50 |
+
f.write(file.file.read())
|
51 |
+
|
52 |
+
#with open(file_path, "rb") as f:
|
53 |
+
#inputs = f.read()
|
54 |
+
|
55 |
+
#inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
|
56 |
+
#inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
57 |
+
text = pipe(file_path, batch_size=BATCH_SIZE, generate_kwargs={"task":"transcribe"}, return_timestamps=True)["text"]
|
58 |
+
return {"transcribe":text}
|
59 |
+
|
60 |
|
61 |
#text_to_speech bock===========================================================================
|
62 |
|
|
|
73 |
|
74 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
75 |
from TTS.api import TTS
|
|
|
|
|
|
|
|
|
76 |
# Get device
|
77 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
78 |
|