neuralleap commited on
Commit
b42ce6c
1 Parent(s): 5eae5c5

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +36 -4
main.py CHANGED
@@ -5,6 +5,7 @@ from fastapi.middleware.cors import CORSMiddleware
5
  import os
6
  import io
7
  import json
 
8
  #import httpcore
9
  #setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
10
 
@@ -21,6 +22,41 @@ app.add_middleware(
21
  )
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  #text_to_speech bock===========================================================================
26
 
@@ -37,10 +73,6 @@ def translate(text,language):
37
 
38
  os.environ["COQUI_TOS_AGREED"] = "1"
39
  from TTS.api import TTS
40
-
41
- import torch
42
- from TTS.api import TTS
43
-
44
  # Get device
45
  device = "cuda" if torch.cuda.is_available() else "cpu"
46
 
 
5
  import os
6
  import io
7
  import json
8
+ import torch
9
  #import httpcore
10
  #setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
11
 
 
22
  )
23
 
24
 
25
+ #============speech_to_text=======================================================
26
+ from transformers import pipeline
27
+ from transformers.pipelines.audio_utils import ffmpeg_read
28
+
29
+ import tempfile
30
+ import os
31
+
32
+ MODEL_NAME = "openai/whisper-base"
33
+ BATCH_SIZE = 8
34
+ FILE_LIMIT_MB = 1000
35
+ YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
36
+
37
+ device = 0 if torch.cuda.is_available() else "cpu"
38
+
39
+ pipe = pipeline(
40
+ task="automatic-speech-recognition",
41
+ model=MODEL_NAME,
42
+ chunk_length_s=30,
43
+ device=device,
44
+ )
45
+ @app.post("/speech_to_text_whispher")
46
+ async def speech_to_text_whispher(file: UploadFile = File(...)):
47
+ file_path = "inputvoice.mp3"
48
+
49
+ with open(file_path, "wb") as f:
50
+ f.write(file.file.read())
51
+
52
+ #with open(file_path, "rb") as f:
53
+ #inputs = f.read()
54
+
55
+ #inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
56
+ #inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
57
+ text = pipe(file_path, batch_size=BATCH_SIZE, generate_kwargs={"task":"transcribe"}, return_timestamps=True)["text"]
58
+ return {"transcribe":text}
59
+
60
 
61
  #text_to_speech bock===========================================================================
62
 
 
73
 
74
  os.environ["COQUI_TOS_AGREED"] = "1"
75
  from TTS.api import TTS
 
 
 
 
76
  # Get device
77
  device = "cuda" if torch.cuda.is_available() else "cpu"
78