Spaces:
Sleeping
Sleeping
ishworrsubedii
commited on
Commit
•
1a05dd7
1
Parent(s):
b368e21
Integrated speech transcription
Browse files- src/__init__.py +0 -19
- src/api/speech_api.py +1 -15
src/__init__.py
CHANGED
@@ -1,19 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Created By: ishwor subedi
|
3 |
-
Date: 2024-07-31
|
4 |
-
"""
|
5 |
-
|
6 |
-
import logging.config
|
7 |
-
import yaml
|
8 |
-
import os
|
9 |
-
|
10 |
-
if os.path.exists("logs"):
|
11 |
-
pass
|
12 |
-
else:
|
13 |
-
os.makedirs("logs")
|
14 |
-
|
15 |
-
log_config_path = os.path.join(os.getcwd(), "logging_config.yaml")
|
16 |
-
with open(log_config_path, 'r') as file:
|
17 |
-
config = yaml.safe_load(file.read())
|
18 |
-
|
19 |
-
logging.config.dictConfig(config)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/api/speech_api.py
CHANGED
@@ -10,7 +10,6 @@ from fastapi import UploadFile, HTTPException, status
|
|
10 |
from src.models.models import TextToSpeechRequest
|
11 |
from fastapi.routing import APIRouter
|
12 |
from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline
|
13 |
-
from src import logging
|
14 |
|
15 |
speech_translator_router = APIRouter(tags=["SpeechTranscription"])
|
16 |
pipeline = SpeechTranscriptionPipeline()
|
@@ -85,19 +84,14 @@ pipeline = SpeechTranscriptionPipeline()
|
|
85 |
"""
|
86 |
)
|
87 |
async def text_to_speech(request: TextToSpeechRequest):
|
88 |
-
logging.info(f"Text to speech request received")
|
89 |
try:
|
90 |
audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
|
91 |
if not audio_bytes:
|
92 |
-
logging.error(f"Audio generation failed.")
|
93 |
raise ValueError("Audio generation failed.")
|
94 |
-
logging.info(f"Text to speech request processed successfully")
|
95 |
return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200)
|
96 |
except ValueError as ve:
|
97 |
-
logging.error(f"Error processing text to speech request: {str(ve)}")
|
98 |
raise HTTPException(status_code=400, detail=str(ve))
|
99 |
except Exception as e:
|
100 |
-
logging.error(f"Internal Server Error: {str(e)}")
|
101 |
raise HTTPException(status_code=500, detail="Internal Server Error")
|
102 |
|
103 |
|
@@ -147,14 +141,11 @@ async def text_to_speech(request: TextToSpeechRequest):
|
|
147 |
"""
|
148 |
)
|
149 |
async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
|
150 |
-
logging.info(f"Speech to text request received")
|
151 |
try:
|
152 |
audio_bytes = await audio.read()
|
153 |
if not audio_bytes:
|
154 |
-
logging.error(f"Empty audio file")
|
155 |
raise ValueError("Empty audio file")
|
156 |
except Exception as e:
|
157 |
-
logging.error(f"Invalid audio file {e}")
|
158 |
raise HTTPException(
|
159 |
status_code=status.HTTP_400_BAD_REQUEST,
|
160 |
detail="Invalid audio file"
|
@@ -164,32 +155,27 @@ async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
|
|
164 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
|
165 |
temp_audio_file.write(audio_bytes)
|
166 |
temp_audio_file_path = temp_audio_file.name
|
167 |
-
logging.info(f"Temporary audio file created at {temp_audio_file_path}")
|
168 |
except Exception as e:
|
169 |
-
logging.error(f"Could not process audio file{e}")
|
170 |
raise HTTPException(
|
171 |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
172 |
detail="Could not process audio file"
|
173 |
)
|
174 |
|
175 |
try:
|
176 |
-
logging.info(f"Transcribing audio to text")
|
177 |
transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
|
178 |
except FileNotFoundError as fnfe:
|
179 |
-
logging.error(f"Temporary file not found{fnfel}")
|
180 |
raise HTTPException(
|
181 |
status_code=status.HTTP_404_NOT_FOUND,
|
182 |
detail="Temporary file not found"
|
183 |
)
|
184 |
except Exception as e:
|
185 |
-
logging.error(f"Error processing speech-to-text: {str(e)}")
|
186 |
raise HTTPException(
|
187 |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
188 |
detail="Error processing speech-to-text"
|
189 |
)
|
190 |
finally:
|
191 |
-
logging.info(f"Cleaning up temporary audio file")
|
192 |
if os.path.exists(temp_audio_file_path):
|
193 |
os.remove(temp_audio_file_path)
|
194 |
|
195 |
return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200)
|
|
|
|
10 |
from src.models.models import TextToSpeechRequest
|
11 |
from fastapi.routing import APIRouter
|
12 |
from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline
|
|
|
13 |
|
14 |
speech_translator_router = APIRouter(tags=["SpeechTranscription"])
|
15 |
pipeline = SpeechTranscriptionPipeline()
|
|
|
84 |
"""
|
85 |
)
|
86 |
async def text_to_speech(request: TextToSpeechRequest):
|
|
|
87 |
try:
|
88 |
audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
|
89 |
if not audio_bytes:
|
|
|
90 |
raise ValueError("Audio generation failed.")
|
|
|
91 |
return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200)
|
92 |
except ValueError as ve:
|
|
|
93 |
raise HTTPException(status_code=400, detail=str(ve))
|
94 |
except Exception as e:
|
|
|
95 |
raise HTTPException(status_code=500, detail="Internal Server Error")
|
96 |
|
97 |
|
|
|
141 |
"""
|
142 |
)
|
143 |
async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
|
|
|
144 |
try:
|
145 |
audio_bytes = await audio.read()
|
146 |
if not audio_bytes:
|
|
|
147 |
raise ValueError("Empty audio file")
|
148 |
except Exception as e:
|
|
|
149 |
raise HTTPException(
|
150 |
status_code=status.HTTP_400_BAD_REQUEST,
|
151 |
detail="Invalid audio file"
|
|
|
155 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
|
156 |
temp_audio_file.write(audio_bytes)
|
157 |
temp_audio_file_path = temp_audio_file.name
|
|
|
158 |
except Exception as e:
|
|
|
159 |
raise HTTPException(
|
160 |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
161 |
detail="Could not process audio file"
|
162 |
)
|
163 |
|
164 |
try:
|
|
|
165 |
transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
|
166 |
except FileNotFoundError as fnfe:
|
|
|
167 |
raise HTTPException(
|
168 |
status_code=status.HTTP_404_NOT_FOUND,
|
169 |
detail="Temporary file not found"
|
170 |
)
|
171 |
except Exception as e:
|
|
|
172 |
raise HTTPException(
|
173 |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
174 |
detail="Error processing speech-to-text"
|
175 |
)
|
176 |
finally:
|
|
|
177 |
if os.path.exists(temp_audio_file_path):
|
178 |
os.remove(temp_audio_file_path)
|
179 |
|
180 |
return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200)
|
181 |
+
|