JustNikunj commited on
Commit
ddc433e
ยท
verified ยท
1 Parent(s): 6be7a07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -32
app.py CHANGED
@@ -6,6 +6,7 @@ import numpy as np
6
  import re
7
  from scipy import signal
8
  import warnings
 
9
  warnings.filterwarnings('ignore')
10
 
11
  print("๐Ÿš€ Starting Enhanced Hindi Speech Sentiment Analysis App...")
@@ -34,17 +35,24 @@ except Exception as e:
34
  # Load IndicWhisper for Hindi ASR (Best for Indian languages)
35
  print("๐ŸŽค Loading IndicWhisper Hindi ASR model...")
36
  try:
 
 
 
 
 
 
37
  asr_pipeline = pipeline(
38
  "automatic-speech-recognition",
39
- model="vasista22/whisper-hindi-medium", # IndicWhisper variant
 
 
40
  device="cpu",
41
  chunk_length_s=30
42
  )
43
  print("โœ… IndicWhisper Hindi ASR model loaded successfully")
44
  except Exception as e:
45
- print(f"โš ๏ธ Error loading IndicWhisper, trying fallback: {e}")
46
  try:
47
- # Fallback to OpenAI Whisper with Hindi optimization
48
  asr_pipeline = pipeline(
49
  "automatic-speech-recognition",
50
  model="openai/whisper-small",
@@ -195,15 +203,6 @@ def validate_hindi_text(text):
195
 
196
  return True, "Valid Hindi/Hinglish", hindi_ratio
197
 
198
- def transliterate_to_hindi(text):
199
- """
200
- If text is in Roman script, attempt to keep Hindi words
201
- This is a placeholder - in production, use proper transliteration library
202
- """
203
- # For now, just return original text
204
- # In production, use: indic-transliteration or aksharamukha library
205
- return text
206
-
207
  # ============================================
208
  # 5. ENHANCED SENTIMENT ANALYSIS
209
  # ============================================
@@ -375,18 +374,11 @@ def predict(audio_filepath):
375
  # ============================================
376
  # STEP 2: Speech-to-Text (ASR)
377
  # ============================================
378
- print("๐Ÿ”„ Transcribing audio with IndicWhisper...")
379
  try:
380
- # Save preprocessed audio temporarily
381
- import tempfile
382
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_audio:
383
- import soundfile as sf
384
- sf.write(temp_audio.name, audio_processed, sr)
385
- temp_audio_path = temp_audio.name
386
-
387
  # Transcribe with Hindi language setting
388
  result = asr_pipeline(
389
- temp_audio_path,
390
  generate_kwargs={
391
  "language": "hindi",
392
  "task": "transcribe"
@@ -396,10 +388,6 @@ def predict(audio_filepath):
396
  transcription = result["text"].strip()
397
  print(f"๐Ÿ“ Raw transcription: '{transcription}'")
398
 
399
- # Clean up temp file
400
- import os
401
- os.unlink(temp_audio_path)
402
-
403
  except Exception as asr_error:
404
  print(f"โŒ ASR Error: {asr_error}")
405
  return {
@@ -491,7 +479,7 @@ def predict(audio_filepath):
491
  # ============================================
492
 
493
  demo = gr.Interface(
494
- fn=predict, # Removed async - not needed for this implementation
495
  inputs=gr.Audio(
496
  type="filepath",
497
  label="๐ŸŽค Record or Upload Hindi Audio",
@@ -506,7 +494,7 @@ demo = gr.Interface(
506
  ## ๐Ÿ‡ฎ๐Ÿ‡ณ Professional-grade Hindi/Hinglish Speech Emotion Analysis
507
 
508
  ### โœจ Advanced Features:
509
- - **๐ŸŽ™๏ธ IndicWhisper ASR** - Best-in-class Hindi transcription
510
  - **๐Ÿง  XLM-RoBERTa** - Multilingual sentiment analysis
511
  - **๐ŸŽต Prosodic Analysis** - Voice tone, pitch, energy detection
512
  - **๐Ÿ”„ Mixed Emotion Detection** - Handles complex feelings
@@ -554,9 +542,5 @@ demo = gr.Interface(
554
 
555
  if __name__ == "__main__":
556
  print("๐ŸŒ Starting server...")
557
- demo.launch(
558
- server_name="0.0.0.0",
559
- server_port=7860,
560
- show_error=True
561
- )
562
  print("๐ŸŽ‰ Enhanced Hindi Sentiment Analysis App is ready!")
 
6
  import re
7
  from scipy import signal
8
  import warnings
9
+ import os
10
  warnings.filterwarnings('ignore')
11
 
12
  print("๐Ÿš€ Starting Enhanced Hindi Speech Sentiment Analysis App...")
 
35
  # Load IndicWhisper for Hindi ASR (Best for Indian languages)
36
  print("๐ŸŽค Loading IndicWhisper Hindi ASR model...")
37
  try:
38
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
39
+
40
+ asr_processor = AutoProcessor.from_pretrained("vasista22/whisper-hindi-medium")
41
+ asr_model = AutoModelForSpeechSeq2Seq.from_pretrained("vasista22/whisper-hindi-medium")
42
+
43
+ # Create pipeline with the loaded model
44
  asr_pipeline = pipeline(
45
  "automatic-speech-recognition",
46
+ model=asr_model,
47
+ tokenizer=asr_processor.tokenizer,
48
+ feature_extractor=asr_processor.feature_extractor,
49
  device="cpu",
50
  chunk_length_s=30
51
  )
52
  print("โœ… IndicWhisper Hindi ASR model loaded successfully")
53
  except Exception as e:
54
+ print(f"โŒ Error loading IndicWhisper, trying fallback: {e}")
55
  try:
 
56
  asr_pipeline = pipeline(
57
  "automatic-speech-recognition",
58
  model="openai/whisper-small",
 
203
 
204
  return True, "Valid Hindi/Hinglish", hindi_ratio
205
 
 
 
 
 
 
 
 
 
 
206
  # ============================================
207
  # 5. ENHANCED SENTIMENT ANALYSIS
208
  # ============================================
 
374
  # ============================================
375
  # STEP 2: Speech-to-Text (ASR)
376
  # ============================================
377
+ print("๐Ÿ”„ Transcribing audio with Whisper...")
378
  try:
 
 
 
 
 
 
 
379
  # Transcribe with Hindi language setting
380
  result = asr_pipeline(
381
+ audio_filepath,
382
  generate_kwargs={
383
  "language": "hindi",
384
  "task": "transcribe"
 
388
  transcription = result["text"].strip()
389
  print(f"๐Ÿ“ Raw transcription: '{transcription}'")
390
 
 
 
 
 
391
  except Exception as asr_error:
392
  print(f"โŒ ASR Error: {asr_error}")
393
  return {
 
479
  # ============================================
480
 
481
  demo = gr.Interface(
482
+ fn=predict,
483
  inputs=gr.Audio(
484
  type="filepath",
485
  label="๐ŸŽค Record or Upload Hindi Audio",
 
494
  ## ๐Ÿ‡ฎ๐Ÿ‡ณ Professional-grade Hindi/Hinglish Speech Emotion Analysis
495
 
496
  ### โœจ Advanced Features:
497
+ - **๐ŸŽ™๏ธ IndicWhisper ASR** - Specialized Hindi transcription model
498
  - **๐Ÿง  XLM-RoBERTa** - Multilingual sentiment analysis
499
  - **๐ŸŽต Prosodic Analysis** - Voice tone, pitch, energy detection
500
  - **๐Ÿ”„ Mixed Emotion Detection** - Handles complex feelings
 
542
 
543
  if __name__ == "__main__":
544
  print("๐ŸŒ Starting server...")
545
+ demo.launch()
 
 
 
 
546
  print("๐ŸŽ‰ Enhanced Hindi Sentiment Analysis App is ready!")