ishworrsubedii commited on
Commit
1a05dd7
1 Parent(s): b368e21

Integrated speech transcription

Browse files
Files changed (2) hide show
  1. src/__init__.py +0 -19
  2. src/api/speech_api.py +1 -15
src/__init__.py CHANGED
@@ -1,19 +0,0 @@
1
- """
2
- Created By: ishwor subedi
3
- Date: 2024-07-31
4
- """
5
-
6
- import logging.config
7
- import yaml
8
- import os
9
-
10
- if os.path.exists("logs"):
11
- pass
12
- else:
13
- os.makedirs("logs")
14
-
15
- log_config_path = os.path.join(os.getcwd(), "logging_config.yaml")
16
- with open(log_config_path, 'r') as file:
17
- config = yaml.safe_load(file.read())
18
-
19
- logging.config.dictConfig(config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/api/speech_api.py CHANGED
@@ -10,7 +10,6 @@ from fastapi import UploadFile, HTTPException, status
10
  from src.models.models import TextToSpeechRequest
11
  from fastapi.routing import APIRouter
12
  from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline
13
- from src import logging
14
 
15
  speech_translator_router = APIRouter(tags=["SpeechTranscription"])
16
  pipeline = SpeechTranscriptionPipeline()
@@ -85,19 +84,14 @@ pipeline = SpeechTranscriptionPipeline()
85
  """
86
  )
87
  async def text_to_speech(request: TextToSpeechRequest):
88
- logging.info(f"Text to speech request received")
89
  try:
90
  audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
91
  if not audio_bytes:
92
- logging.error(f"Audio generation failed.")
93
  raise ValueError("Audio generation failed.")
94
- logging.info(f"Text to speech request processed successfully")
95
  return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200)
96
  except ValueError as ve:
97
- logging.error(f"Error processing text to speech request: {str(ve)}")
98
  raise HTTPException(status_code=400, detail=str(ve))
99
  except Exception as e:
100
- logging.error(f"Internal Server Error: {str(e)}")
101
  raise HTTPException(status_code=500, detail="Internal Server Error")
102
 
103
 
@@ -147,14 +141,11 @@ async def text_to_speech(request: TextToSpeechRequest):
147
  """
148
  )
149
  async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
150
- logging.info(f"Speech to text request received")
151
  try:
152
  audio_bytes = await audio.read()
153
  if not audio_bytes:
154
- logging.error(f"Empty audio file")
155
  raise ValueError("Empty audio file")
156
  except Exception as e:
157
- logging.error(f"Invalid audio file {e}")
158
  raise HTTPException(
159
  status_code=status.HTTP_400_BAD_REQUEST,
160
  detail="Invalid audio file"
@@ -164,32 +155,27 @@ async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
164
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
165
  temp_audio_file.write(audio_bytes)
166
  temp_audio_file_path = temp_audio_file.name
167
- logging.info(f"Temporary audio file created at {temp_audio_file_path}")
168
  except Exception as e:
169
- logging.error(f"Could not process audio file{e}")
170
  raise HTTPException(
171
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
172
  detail="Could not process audio file"
173
  )
174
 
175
  try:
176
- logging.info(f"Transcribing audio to text")
177
  transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
178
  except FileNotFoundError as fnfe:
179
- logging.error(f"Temporary file not found{fnfel}")
180
  raise HTTPException(
181
  status_code=status.HTTP_404_NOT_FOUND,
182
  detail="Temporary file not found"
183
  )
184
  except Exception as e:
185
- logging.error(f"Error processing speech-to-text: {str(e)}")
186
  raise HTTPException(
187
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
188
  detail="Error processing speech-to-text"
189
  )
190
  finally:
191
- logging.info(f"Cleaning up temporary audio file")
192
  if os.path.exists(temp_audio_file_path):
193
  os.remove(temp_audio_file_path)
194
 
195
  return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200)
 
 
10
  from src.models.models import TextToSpeechRequest
11
  from fastapi.routing import APIRouter
12
  from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline
 
13
 
14
  speech_translator_router = APIRouter(tags=["SpeechTranscription"])
15
  pipeline = SpeechTranscriptionPipeline()
 
84
  """
85
  )
86
  async def text_to_speech(request: TextToSpeechRequest):
 
87
  try:
88
  audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
89
  if not audio_bytes:
 
90
  raise ValueError("Audio generation failed.")
 
91
  return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200)
92
  except ValueError as ve:
 
93
  raise HTTPException(status_code=400, detail=str(ve))
94
  except Exception as e:
 
95
  raise HTTPException(status_code=500, detail="Internal Server Error")
96
 
97
 
 
141
  """
142
  )
143
  async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
 
144
  try:
145
  audio_bytes = await audio.read()
146
  if not audio_bytes:
 
147
  raise ValueError("Empty audio file")
148
  except Exception as e:
 
149
  raise HTTPException(
150
  status_code=status.HTTP_400_BAD_REQUEST,
151
  detail="Invalid audio file"
 
155
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
156
  temp_audio_file.write(audio_bytes)
157
  temp_audio_file_path = temp_audio_file.name
 
158
  except Exception as e:
 
159
  raise HTTPException(
160
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
161
  detail="Could not process audio file"
162
  )
163
 
164
  try:
 
165
  transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
166
  except FileNotFoundError as fnfe:
 
167
  raise HTTPException(
168
  status_code=status.HTTP_404_NOT_FOUND,
169
  detail="Temporary file not found"
170
  )
171
  except Exception as e:
 
172
  raise HTTPException(
173
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
174
  detail="Error processing speech-to-text"
175
  )
176
  finally:
 
177
  if os.path.exists(temp_audio_file_path):
178
  os.remove(temp_audio_file_path)
179
 
180
  return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200)
181
+