geethareddy commited on
Commit
fb84286
·
verified ·
1 Parent(s): 7ee25bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -13
app.py CHANGED
@@ -9,16 +9,13 @@ from waitress import serve
9
 
10
  app = Flask(__name__)
11
 
12
- # Use whisper-small for faster processing and better speed
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
  asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if device == "cuda" else -1)
15
 
16
- # Function to generate audio prompts
17
  def generate_audio_prompt(text, filename):
18
  tts = gTTS(text=text, lang="en")
19
  tts.save(os.path.join("static", filename))
20
 
21
- # Generate required voice prompts
22
  prompts = {
23
  "welcome": "Welcome to Biryani Hub.",
24
  "ask_name": "Tell me your name.",
@@ -29,7 +26,6 @@ prompts = {
29
  for key, text in prompts.items():
30
  generate_audio_prompt(text, f"{key}.mp3")
31
 
32
- # Symbol mapping for proper recognition
33
  SYMBOL_MAPPING = {
34
  "at the rate": "@",
35
  "at": "@",
@@ -42,20 +38,18 @@ SYMBOL_MAPPING = {
42
  "space": " "
43
  }
44
 
45
- # Function to convert audio to WAV format
46
  def convert_to_wav(input_path, output_path):
47
  try:
48
  audio = AudioSegment.from_file(input_path)
49
- audio = audio.set_frame_rate(16000).set_channels(1) # Convert to 16kHz, mono
50
  audio.export(output_path, format="wav")
51
  except Exception as e:
52
  raise Exception(f"Audio conversion failed: {str(e)}")
53
 
54
- # Function to check if audio contains actual speech
55
  def is_silent_audio(audio_path):
56
  audio = AudioSegment.from_wav(audio_path)
57
- nonsilent_parts = detect_nonsilent(audio, min_silence_len=500, silence_thresh=audio.dBFS-16) # Reduced silence duration
58
- return len(nonsilent_parts) == 0 # If no speech detected
59
 
60
  @app.route("/")
61
  def index():
@@ -72,14 +66,11 @@ def transcribe():
72
  audio_file.save(input_audio_path)
73
 
74
  try:
75
- # Convert to WAV
76
  convert_to_wav(input_audio_path, output_audio_path)
77
 
78
- # Check for silence
79
  if is_silent_audio(output_audio_path):
80
  return jsonify({"error": "No speech detected. Please try again."}), 400
81
 
82
- # Use Whisper ASR model for transcription
83
  result = asr_model(output_audio_path, generate_kwargs={"language": "en"})
84
  transcribed_text = result["text"].strip().capitalize()
85
 
@@ -87,6 +78,5 @@ def transcribe():
87
  except Exception as e:
88
  return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
89
 
90
- # Start Production Server
91
  if __name__ == "__main__":
92
  serve(app, host="0.0.0.0", port=7860)
 
9
 
10
  app = Flask(__name__)
11
 
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if device == "cuda" else -1)
14
 
 
15
  def generate_audio_prompt(text, filename):
16
  tts = gTTS(text=text, lang="en")
17
  tts.save(os.path.join("static", filename))
18
 
 
19
  prompts = {
20
  "welcome": "Welcome to Biryani Hub.",
21
  "ask_name": "Tell me your name.",
 
26
  for key, text in prompts.items():
27
  generate_audio_prompt(text, f"{key}.mp3")
28
 
 
29
  SYMBOL_MAPPING = {
30
  "at the rate": "@",
31
  "at": "@",
 
38
  "space": " "
39
  }
40
 
 
41
  def convert_to_wav(input_path, output_path):
42
  try:
43
  audio = AudioSegment.from_file(input_path)
44
+ audio = audio.set_frame_rate(16000).set_channels(1)
45
  audio.export(output_path, format="wav")
46
  except Exception as e:
47
  raise Exception(f"Audio conversion failed: {str(e)}")
48
 
 
49
  def is_silent_audio(audio_path):
50
  audio = AudioSegment.from_wav(audio_path)
51
+ nonsilent_parts = detect_nonsilent(audio, min_silence_len=500, silence_thresh=audio.dBFS-16)
52
+ return len(nonsilent_parts) == 0
53
 
54
  @app.route("/")
55
  def index():
 
66
  audio_file.save(input_audio_path)
67
 
68
  try:
 
69
  convert_to_wav(input_audio_path, output_audio_path)
70
 
 
71
  if is_silent_audio(output_audio_path):
72
  return jsonify({"error": "No speech detected. Please try again."}), 400
73
 
 
74
  result = asr_model(output_audio_path, generate_kwargs={"language": "en"})
75
  transcribed_text = result["text"].strip().capitalize()
76
 
 
78
  except Exception as e:
79
  return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
80
 
 
81
  if __name__ == "__main__":
82
  serve(app, host="0.0.0.0", port=7860)